[llvm] SelectionDAG: Improve expandFMINIMUM_FMAXIMUM (PR #137367)

YunQiang Su via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 8 22:58:34 PST 2025


https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/137367

>From 924946cee5b062e4a5298d28e97ac1fe6df732da Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Sat, 26 Apr 2025 01:15:56 +0800
Subject: [PATCH 1/6] expandFMINIMUM_FMAXIMUM: FMAXNUM/FMINNUM treat +0>-0

ISD::FMAXNUM and ISD::FMINNUM treat +0.0>-0.0 now,
so let's set MinMaxMustRespectOrderedZero for it.
---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 76278e99d4082..8f6e0011e5b01 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8745,8 +8745,6 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
 
-  // FIXME: We should probably define fminnum/fmaxnum variants with correct
-  // signed zero behavior.
   bool MinMaxMustRespectOrderedZero = false;
 
   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
@@ -8754,6 +8752,7 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
     MinMaxMustRespectOrderedZero = true;
   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
+    MinMaxMustRespectOrderedZero = true;
   } else {
     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
       return DAG.UnrollVectorOp(N);

>From a0a2c79ab0bc48a749c0697c85f49002cb08fc54 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Sat, 26 Apr 2025 12:31:17 +0800
Subject: [PATCH 2/6] Fix testcase

---
 llvm/test/CodeGen/NVPTX/math-intrins.ll       | 200 +++++++-----------
 .../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 133 +++---------
 2 files changed, 110 insertions(+), 223 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index 625c93c3f0a53..c11d0dae4c48c 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -681,22 +681,16 @@ define half @minimum_half(half %a, half %b) {
 define float @minimum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: minimum_float(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_param_0];
-; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, -2147483648;
-; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r4, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r7;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_param_0];
+; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NOF16-NEXT:    min.f32 %f3, %f1, %f2;
+; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float(
@@ -727,19 +721,15 @@ define float @minimum_float(float %a, float %b) {
 define float @minimum_imm1(float %a) {
 ; CHECK-NOF16-LABEL: minimum_imm1(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<4>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm1_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, %r1, %r3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r4, %r3, %p3;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm1_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
+; CHECK-NOF16-NEXT:    min.f32 %f2, %f1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_imm1(
@@ -768,19 +758,15 @@ define float @minimum_imm1(float %a) {
 define float @minimum_imm2(float %a) {
 ; CHECK-NOF16-LABEL: minimum_imm2(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<4>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm2_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, %r1, %r3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r4, %r3, %p3;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm2_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
+; CHECK-NOF16-NEXT:    min.f32 %f2, %f1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_imm2(
@@ -809,22 +795,16 @@ define float @minimum_imm2(float %a) {
 define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: minimum_float_ftz(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_ftz_param_0];
-; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    min.ftz.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, -2147483648;
-; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.ftz.f32 %p4, %r4, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r7;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_ftz_param_0];
+; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_ftz_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %f1, %f2;
+; CHECK-NOF16-NEXT:    min.ftz.f32 %f3, %f1, %f2;
+; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float_ftz(
@@ -855,22 +835,16 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
 define double @minimum_double(double %a, double %b) {
 ; CHECK-LABEL: minimum_double(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<5>;
-; CHECK-NEXT:    .reg .b64 %rd<8>;
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b64 %fd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.b64 %rd1, [minimum_double_param_0];
-; CHECK-NEXT:    ld.param.b64 %rd2, [minimum_double_param_1];
-; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
-; CHECK-NEXT:    min.f64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT:    setp.eq.b64 %p2, %rd1, -9223372036854775808;
-; CHECK-NEXT:    selp.f64 %rd5, %rd1, %rd4, %p2;
-; CHECK-NEXT:    setp.eq.b64 %p3, %rd2, -9223372036854775808;
-; CHECK-NEXT:    selp.f64 %rd6, %rd2, %rd5, %p3;
-; CHECK-NEXT:    setp.eq.f64 %p4, %rd4, 0d0000000000000000;
-; CHECK-NEXT:    selp.f64 %rd7, %rd6, %rd4, %p4;
-; CHECK-NEXT:    st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT:    ld.param.f64 %fd1, [minimum_double_param_0];
+; CHECK-NEXT:    ld.param.f64 %fd2, [minimum_double_param_1];
+; CHECK-NEXT:    setp.nan.f64 %p1, %fd1, %fd2;
+; CHECK-NEXT:    min.f64 %fd3, %fd1, %fd2;
+; CHECK-NEXT:    selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1;
+; CHECK-NEXT:    st.param.f64 [func_retval0], %fd4;
 ; CHECK-NEXT:    ret;
   %x = call double @llvm.minimum.f64(double %a, double %b)
   ret double %x
@@ -1212,17 +1186,15 @@ define half @maximum_half(half %a, half %b) {
 define float @maximum_imm1(float %a) {
 ; CHECK-NOF16-LABEL: maximum_imm1(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_imm1_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    max.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p2, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f00000000, %r3, %p2;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm1_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
+; CHECK-NOF16-NEXT:    max.f32 %f2, %f1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_imm1(
@@ -1251,17 +1223,15 @@ define float @maximum_imm1(float %a) {
 define float @maximum_imm2(float %a) {
 ; CHECK-NOF16-LABEL: maximum_imm2(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_imm2_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    max.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p2, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f00000000, %r3, %p2;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm2_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
+; CHECK-NOF16-NEXT:    max.f32 %f2, %f1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_imm2(
@@ -1290,22 +1260,16 @@ define float @maximum_imm2(float %a) {
 define float @maximum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: maximum_float(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_param_0];
-; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    max.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r4, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r7;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_param_0];
+; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NOF16-NEXT:    max.f32 %f3, %f1, %f2;
+; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_float(
@@ -1336,22 +1300,16 @@ define float @maximum_float(float %a, float %b) {
 define float @maximum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: maximum_float_ftz(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
+; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_ftz_param_0];
-; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    max.ftz.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.ftz.f32 %p4, %r4, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r7;
+; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_ftz_param_0];
+; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_ftz_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %f1, %f2;
+; CHECK-NOF16-NEXT:    max.ftz.f32 %f3, %f1, %f2;
+; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
+; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_float_ftz(
@@ -1382,22 +1340,16 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
 define double @maximum_double(double %a, double %b) {
 ; CHECK-LABEL: maximum_double(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<5>;
-; CHECK-NEXT:    .reg .b64 %rd<8>;
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b64 %fd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.b64 %rd1, [maximum_double_param_0];
-; CHECK-NEXT:    ld.param.b64 %rd2, [maximum_double_param_1];
-; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
-; CHECK-NEXT:    max.f64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT:    setp.eq.b64 %p2, %rd1, 0;
-; CHECK-NEXT:    selp.f64 %rd5, %rd1, %rd4, %p2;
-; CHECK-NEXT:    setp.eq.b64 %p3, %rd2, 0;
-; CHECK-NEXT:    selp.f64 %rd6, %rd2, %rd5, %p3;
-; CHECK-NEXT:    setp.eq.f64 %p4, %rd4, 0d0000000000000000;
-; CHECK-NEXT:    selp.f64 %rd7, %rd6, %rd4, %p4;
-; CHECK-NEXT:    st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT:    ld.param.f64 %fd1, [maximum_double_param_0];
+; CHECK-NEXT:    ld.param.f64 %fd2, [maximum_double_param_1];
+; CHECK-NEXT:    setp.nan.f64 %p1, %fd1, %fd2;
+; CHECK-NEXT:    max.f64 %fd3, %fd1, %fd2;
+; CHECK-NEXT:    selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1;
+; CHECK-NEXT:    st.param.f64 [func_retval0], %fd4;
 ; CHECK-NEXT:    ret;
   %x = call double @llvm.maximum.f64(double %a, double %b)
   ret double %x
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index a99c25a4e4479..39cf136e10d77 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -301,22 +301,13 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xvcmpeqsp 1, 35, 35
 ; VSX-NEXT:    xvcmpeqsp 2, 34, 34
 ; VSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; VSX-NEXT:    xxleqv 36, 36, 36
-; VSX-NEXT:    xvminsp 0, 34, 35
-; VSX-NEXT:    vslw 4, 4, 4
 ; VSX-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
-; VSX-NEXT:    vcmpequw 5, 2, 4
+; VSX-NEXT:    xvminsp 0, 34, 35
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxlxor 2, 2, 2
-; VSX-NEXT:    xvcmpeqsp 2, 0, 2
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequw 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xxsel 34, 0, 1, 2
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_minimum:
@@ -324,21 +315,12 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 1, 35, 35
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C4(2) # %const.0
-; AIX-NEXT:    xxleqv 36, 36, 36
 ; AIX-NEXT:    xvminsp 0, 34, 35
-; AIX-NEXT:    vslw 4, 4, 4
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxlxor 2, 2, 2
-; AIX-NEXT:    xvcmpeqsp 2, 0, 2
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequw 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xxsel 34, 0, 1, 2
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -377,16 +359,9 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
 ; VSX-NEXT:    xvmaxsp 0, 34, 35
-; VSX-NEXT:    xxlxor 36, 36, 36
-; VSX-NEXT:    vcmpequw 5, 2, 4
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xvcmpeqsp 2, 0, 36
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequw 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xxsel 34, 0, 1, 2
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_maximum:
@@ -395,18 +370,11 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C5(2) # %const.0
 ; AIX-NEXT:    xvmaxsp 0, 34, 35
-; AIX-NEXT:    xxlxor 36, 36, 36
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xvcmpeqsp 2, 0, 36
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequw 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xxsel 34, 0, 1, 2
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -493,47 +461,28 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_minimum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; VSX-NEXT:    xvcmpeqdp 36, 35, 35
-; VSX-NEXT:    xvcmpeqdp 37, 34, 34
-; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; VSX-NEXT:    xxlnor 36, 36, 36
-; VSX-NEXT:    xxlnor 37, 37, 37
 ; VSX-NEXT:    xvmindp 0, 34, 35
+; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
+; VSX-NEXT:    xvcmpeqdp 34, 34, 34
+; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xxlnor 34, 34, 34
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    addis 3, 2, .LCPI6_1 at toc@ha
-; VSX-NEXT:    xxlor 1, 37, 36
-; VSX-NEXT:    addi 3, 3, .LCPI6_1 at toc@l
-; VSX-NEXT:    lxvd2x 36, 0, 3
-; VSX-NEXT:    vcmpequd 5, 2, 4
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxlxor 2, 2, 2
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequd 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xvcmpeqdp 34, 0, 2
-; VSX-NEXT:    xxsel 34, 0, 1, 34
+; VSX-NEXT:    xxlor 1, 34, 35
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_minimum:
 ; AIX:       # %bb.0: # %entry
 ; AIX-NEXT:    ld 3, L..C6(2) # %const.0
-; AIX-NEXT:    xvcmpeqdp 36, 35, 35
-; AIX-NEXT:    xvcmpeqdp 37, 34, 34
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    ld 3, L..C7(2) # %const.1
-; AIX-NEXT:    xxlnor 36, 36, 36
-; AIX-NEXT:    xxlnor 37, 37, 37
 ; AIX-NEXT:    xvmindp 0, 34, 35
-; AIX-NEXT:    xxlor 1, 37, 36
-; AIX-NEXT:    lxvd2x 36, 0, 3
-; AIX-NEXT:    vcmpequd 5, 2, 4
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxlxor 2, 2, 2
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequd 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xvcmpeqdp 34, 0, 2
-; AIX-NEXT:    xxsel 34, 0, 1, 34
+; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    lxvd2x 2, 0, 3
+; AIX-NEXT:    xvcmpeqdp 34, 34, 34
+; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xxlnor 34, 34, 34
+; AIX-NEXT:    xxlor 1, 34, 35
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
@@ -618,42 +567,28 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_maximum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; VSX-NEXT:    xvcmpeqdp 36, 35, 35
-; VSX-NEXT:    xvcmpeqdp 37, 34, 34
-; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; VSX-NEXT:    xxlnor 36, 36, 36
-; VSX-NEXT:    xxlnor 37, 37, 37
 ; VSX-NEXT:    xvmaxdp 0, 34, 35
+; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
+; VSX-NEXT:    xvcmpeqdp 34, 34, 34
+; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xxlnor 34, 34, 34
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 37, 36
-; VSX-NEXT:    xxlxor 36, 36, 36
-; VSX-NEXT:    vcmpequd 5, 2, 4
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequd 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xvcmpeqdp 34, 0, 36
-; VSX-NEXT:    xxsel 34, 0, 1, 34
+; VSX-NEXT:    xxlor 1, 34, 35
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    ld 3, L..C8(2) # %const.0
-; AIX-NEXT:    xvcmpeqdp 36, 35, 35
-; AIX-NEXT:    xvcmpeqdp 37, 34, 34
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xxlnor 36, 36, 36
-; AIX-NEXT:    xxlnor 37, 37, 37
+; AIX-NEXT:    ld 3, L..C7(2) # %const.0
 ; AIX-NEXT:    xvmaxdp 0, 34, 35
-; AIX-NEXT:    xxlor 1, 37, 36
-; AIX-NEXT:    xxlxor 36, 36, 36
-; AIX-NEXT:    vcmpequd 5, 2, 4
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequd 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xvcmpeqdp 34, 0, 36
-; AIX-NEXT:    xxsel 34, 0, 1, 34
+; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    lxvd2x 2, 0, 3
+; AIX-NEXT:    xvcmpeqdp 34, 34, 34
+; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xxlnor 34, 34, 34
+; AIX-NEXT:    xxlor 1, 34, 35
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)

>From 9018fb93fd7bbd5059c258b913fd465105025d93 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Thu, 4 Dec 2025 16:57:26 +0800
Subject: [PATCH 3/6] Update nvptx tests

---
 llvm/test/CodeGen/NVPTX/math-intrins.ll | 132 ++++++++++++------------
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index c11d0dae4c48c..6fb0112631af7 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -682,15 +682,15 @@ define float @minimum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: minimum_float(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_param_0];
-; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f2;
-; CHECK-NOF16-NEXT:    min.f32 %f3, %f1, %f2;
-; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_param_0];
+; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float(
@@ -722,14 +722,14 @@ define float @minimum_imm1(float %a) {
 ; CHECK-NOF16-LABEL: minimum_imm1(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm1_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
-; CHECK-NOF16-NEXT:    min.f32 %f2, %f1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm1_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_imm1(
@@ -759,14 +759,14 @@ define float @minimum_imm2(float %a) {
 ; CHECK-NOF16-LABEL: minimum_imm2(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm2_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
-; CHECK-NOF16-NEXT:    min.f32 %f2, %f1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm2_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_imm2(
@@ -796,15 +796,15 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: minimum_float_ftz(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_ftz_param_0];
-; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_ftz_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %f1, %f2;
-; CHECK-NOF16-NEXT:    min.ftz.f32 %f3, %f1, %f2;
-; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_ftz_param_0];
+; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_ftz_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT:    min.ftz.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float_ftz(
@@ -836,15 +836,15 @@ define double @minimum_double(double %a, double %b) {
 ; CHECK-LABEL: minimum_double(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b64 %fd<5>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.f64 %fd1, [minimum_double_param_0];
-; CHECK-NEXT:    ld.param.f64 %fd2, [minimum_double_param_1];
-; CHECK-NEXT:    setp.nan.f64 %p1, %fd1, %fd2;
-; CHECK-NEXT:    min.f64 %fd3, %fd1, %fd2;
-; CHECK-NEXT:    selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1;
-; CHECK-NEXT:    st.param.f64 [func_retval0], %fd4;
+; CHECK-NEXT:    ld.param.b64 %rd1, [minimum_double_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [minimum_double_param_1];
+; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
+; CHECK-NEXT:    min.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %x = call double @llvm.minimum.f64(double %a, double %b)
   ret double %x
@@ -1187,14 +1187,14 @@ define float @maximum_imm1(float %a) {
 ; CHECK-NOF16-LABEL: maximum_imm1(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm1_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
-; CHECK-NOF16-NEXT:    max.f32 %f2, %f1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_imm1_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    max.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_imm1(
@@ -1224,14 +1224,14 @@ define float @maximum_imm2(float %a) {
 ; CHECK-NOF16-LABEL: maximum_imm2(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<4>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm2_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
-; CHECK-NOF16-NEXT:    max.f32 %f2, %f1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_imm2_param_0];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    max.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_imm2(
@@ -1261,15 +1261,15 @@ define float @maximum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: maximum_float(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_param_0];
-; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f2;
-; CHECK-NOF16-NEXT:    max.f32 %f3, %f1, %f2;
-; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_param_0];
+; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT:    max.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_float(
@@ -1301,15 +1301,15 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: maximum_float_ftz(
 ; CHECK-NOF16:       {
 ; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %f<5>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
-; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_ftz_param_0];
-; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_ftz_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %f1, %f2;
-; CHECK-NOF16-NEXT:    max.ftz.f32 %f3, %f1, %f2;
-; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
-; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
+; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_ftz_param_0];
+; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_ftz_param_1];
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT:    max.ftz.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_float_ftz(
@@ -1341,15 +1341,15 @@ define double @maximum_double(double %a, double %b) {
 ; CHECK-LABEL: maximum_double(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b64 %fd<5>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.f64 %fd1, [maximum_double_param_0];
-; CHECK-NEXT:    ld.param.f64 %fd2, [maximum_double_param_1];
-; CHECK-NEXT:    setp.nan.f64 %p1, %fd1, %fd2;
-; CHECK-NEXT:    max.f64 %fd3, %fd1, %fd2;
-; CHECK-NEXT:    selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1;
-; CHECK-NEXT:    st.param.f64 [func_retval0], %fd4;
+; CHECK-NEXT:    ld.param.b64 %rd1, [maximum_double_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [maximum_double_param_1];
+; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
+; CHECK-NEXT:    max.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %x = call double @llvm.maximum.f64(double %a, double %b)
   ret double %x

>From fe36e4fec08defeebadc1c38d6f694c39c26ca6d Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 9 Dec 2025 10:36:53 +0800
Subject: [PATCH 4/6] Improve expandFMINIMUM_FMAXIMUM

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 110 +++++++++++-------
 1 file changed, 68 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 85669609f0c68..29b0bb38ed5b5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8729,6 +8729,28 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
   return SDValue();
 }
 
+static SDValue determineFloatSign(SDValue N, SelectionDAG &DAG, bool Postive) {
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = N->getValueType(0);
+  EVT IntVT = VT.changeTypeToInteger();
+  SDValue NTrunc = N;
+  if (!TLI.isTypeLegal(IntVT)) {
+    EVT FloatVT = VT.changeElementType(MVT::f32);
+    IntVT = VT.changeElementType(MVT::i32);
+    NTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, N,
+                         DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+  }
+  EVT CCVT =
+      TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
+
+  // FIXME: how to support 16bit/8bit targets?
+  SDValue IntN = DAG.getNode(ISD::BITCAST, DL, IntVT, NTrunc);
+
+  return DAG.getSetCC(DL, CCVT, IntN, DAG.getConstant(0, DL, IntVT),
+                      Postive ? ISD::SETGE : ISD::SETLT);
+}
+
 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
                                                 SelectionDAG &DAG) const {
   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
@@ -8742,57 +8764,61 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
   bool IsMax = Opc == ISD::FMAXIMUM;
   SDNodeFlags Flags = N->getFlags();
+  bool LHSNotZero = DAG.isKnownNeverZeroFloat(LHS);
+  bool RHSNotZero = DAG.isKnownNeverZeroFloat(RHS);
 
   // First, implement comparison not propagating NaN. If no native fmin or fmax
   // available, use plain select with setcc instead.
   SDValue MinMax;
-  unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
-  unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
-
-  bool MinMaxMustRespectOrderedZero = false;
-
-  if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
-    MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
-    MinMaxMustRespectOrderedZero = true;
-  } else if (isOperationLegalOrCustom(CompOpc, VT)) {
-    MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
-    MinMaxMustRespectOrderedZero = true;
+  unsigned MinMaxOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+  unsigned MinMaxOpcNum = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
+  unsigned MinMaxOpcNum2019 = IsMax ? ISD::FMAXIMUMNUM : ISD::FMINIMUMNUM;
+  unsigned MinMaxOpc = 0;
+
+  bool IsZeroOrdered = false;
+
+  if (isOperationLegal(MinMaxOpcIeee, VT))
+    MinMaxOpc = MinMaxOpcIeee;
+  else if (isOperationLegal(MinMaxOpcNum, VT))
+    MinMaxOpc = MinMaxOpcNum;
+  else if (isOperationLegal(MinMaxOpcNum2019, VT))
+    MinMaxOpc = MinMaxOpcNum2019;
+  else if (isOperationCustom(MinMaxOpcIeee, VT))
+    MinMaxOpc = MinMaxOpcIeee;
+  else if (isOperationCustom(MinMaxOpcNum, VT))
+    MinMaxOpc = MinMaxOpcNum;
+  else if (isOperationCustom(MinMaxOpcNum2019, VT))
+    MinMaxOpc = MinMaxOpcNum2019;
+  if (MinMaxOpc) {
+    if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS))
+      LHS = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, RHS, RHS, ISD::SETUO),
+                          RHS, LHS, Flags);
+    if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS))
+      RHS = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, LHS, ISD::SETUO),
+                          LHS, RHS, Flags);
+    MinMax = DAG.getNode(MinMaxOpc, DL, VT, LHS, RHS, Flags);
+    IsZeroOrdered = true;
   } else {
-    if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
-      return DAG.UnrollVectorOp(N);
-
-    // NaN (if exists) will be propagated later, so orderness doesn't matter.
-    SDValue Compare =
-        DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
-    MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
+    if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS))
+      LHS = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, RHS, RHS, ISD::SETUO),
+                          RHS, LHS, Flags);
+    MinMax = DAG.getSelectCC(DL, LHS, RHS, LHS, RHS,
+                             IsMax ? ISD::SETUGT : ISD::SETULT);
   }
 
-  // Propagate any NaN of both operands
-  if (!N->getFlags().hasNoNaNs() &&
-      (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
-    ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
-                                        APFloat::getNaN(VT.getFltSemantics()));
-    MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
-                           DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
-  }
+  // TODO: We need quiet sNaN if strictfp.
 
   // fminimum/fmaximum requires -0.0 less than +0.0
-  if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
-      !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
-    SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
-                                  DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
-    SDValue TestZero =
-        DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
-    SDValue LCmp = DAG.getSelect(
-        DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
-        MinMax, Flags);
-    SDValue RCmp = DAG.getSelect(
-        DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
-        LCmp, Flags);
-    MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
-  }
-
-  return MinMax;
+  if (IsZeroOrdered || Flags.hasNoSignedZeros() || LHSNotZero || RHSNotZero) {
+    return MinMax;
+  }
+  SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+                                DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+
+  SDValue RetZero =
+      DAG.getSelect(DL, VT, determineFloatSign(LHS, DAG, IsMax ? true : false),
+                    LHS, MinMax, Flags);
+  return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
 }
 
 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,

>From db19f15345045f2552bf8dceed449e2453b27580 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 9 Dec 2025 13:39:35 +0800
Subject: [PATCH 5/6] Unroll for vector without vselect: v1f64 need it

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |    7 +-
 .../test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll | 1052 ++--
 .../CodeGen/AMDGPU/a-v-global-atomicrmw.ll    |  804 +--
 .../AMDGPU/fcanonicalize-elimination.ll       | 1402 ++++++
 llvm/test/CodeGen/AMDGPU/fmaximum.ll          | 1073 ++--
 llvm/test/CodeGen/AMDGPU/fmaximum3.ll         | 3002 +++++++----
 llvm/test/CodeGen/AMDGPU/fminimum.ll          | 1073 ++--
 llvm/test/CodeGen/AMDGPU/fminimum3.ll         | 3002 +++++++----
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |  326 +-
 llvm/test/CodeGen/AMDGPU/fract-match.ll       |   56 +-
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll  | 3047 ++++++-----
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll  | 2025 +++++---
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll  | 3733 ++++++++------
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll  | 2517 ++++++----
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll  | 2025 +++++---
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll  | 3733 ++++++++------
 .../AMDGPU/uniform-vgpr-to-sgpr-return.ll     |  106 +-
 .../CodeGen/AMDGPU/vector-reduce-fmaximum.ll  | 4462 +++++++++++------
 .../CodeGen/AMDGPU/vector-reduce-fminimum.ll  | 4462 +++++++++++------
 llvm/test/CodeGen/Mips/fp-maximum-minimum.ll  |  976 ++++
 llvm/test/CodeGen/NVPTX/bf16-instructions.ll  |   86 +-
 llvm/test/CodeGen/NVPTX/math-intrins.ll       |  420 +-
 .../CodeGen/PowerPC/fminimum-fmaximum-f128.ll |  110 +-
 .../test/CodeGen/PowerPC/fminimum-fmaximum.ll |  729 ++-
 llvm/test/CodeGen/X86/fminimum-fmaximum.ll    |  298 +-
 25 files changed, 26004 insertions(+), 14522 deletions(-)
 create mode 100644 llvm/test/CodeGen/Mips/fp-maximum-minimum.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 29b0bb38ed5b5..e0821d6ed64e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8729,7 +8729,7 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
   return SDValue();
 }
 
-static SDValue determineFloatSign(SDValue N, SelectionDAG &DAG, bool Postive) {
+static SDValue determineFloatSign(SDValue N, SelectionDAG &DAG, bool Positive) {
   SDLoc DL(N);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = N->getValueType(0);
@@ -8748,7 +8748,7 @@ static SDValue determineFloatSign(SDValue N, SelectionDAG &DAG, bool Postive) {
   SDValue IntN = DAG.getNode(ISD::BITCAST, DL, IntVT, NTrunc);
 
   return DAG.getSetCC(DL, CCVT, IntN, DAG.getConstant(0, DL, IntVT),
-                      Postive ? ISD::SETGE : ISD::SETLT);
+                      Positive ? ISD::SETGE : ISD::SETLT);
 }
 
 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
@@ -8799,6 +8799,9 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
     MinMax = DAG.getNode(MinMaxOpc, DL, VT, LHS, RHS, Flags);
     IsZeroOrdered = true;
   } else {
+    if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+      return DAG.UnrollVectorOp(N);
+
     if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS))
       LHS = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, RHS, RHS, ISD::SETUO),
                           RHS, LHS, Flags);
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
index b8962fa29e8f1..81865736ae9ee 100644
--- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
@@ -9557,13 +9557,14 @@ define void @flat_atomic_fmaximum_f32_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:  .LBB123_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -9619,16 +9620,17 @@ define void @flat_atomic_fmaximum_f32_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    flat_load_dword v3, v[0:1] offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB124_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -9685,13 +9687,14 @@ define void @flat_atomic_fminimum_f32_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -9747,16 +9750,17 @@ define void @flat_atomic_fminimum_f32_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    flat_load_dword v3, v[0:1] offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB126_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -10691,14 +10695,16 @@ define void @flat_atomic_fmaximum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB135_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -10720,16 +10726,19 @@ define void @flat_atomic_fmaximum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
 ; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
-; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX90A-NEXT:    buffer_store_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB135_6: ; %atomicrmw.phi
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX90A-NEXT:    ;;#ASMSTART
@@ -10757,15 +10766,18 @@ define void @flat_atomic_fmaximum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX950-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB135_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -10784,17 +10796,20 @@ define void @flat_atomic_fmaximum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX950-NEXT:    s_cbranch_execz .LBB135_6
 ; GFX950-NEXT:  ; %bb.5: ; %atomicrmw.private
 ; GFX950-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_nop 1
 ; GFX950-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a0, v0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 v6, v[2:3], off
 ; GFX950-NEXT:  .LBB135_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    s_or_b64 exec, exec, s[0:1]
@@ -10828,14 +10843,16 @@ define void @flat_atomic_fmaximum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB136_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -10853,14 +10870,17 @@ define void @flat_atomic_fmaximum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:  ; %bb.5: ; %atomicrmw.private
 ; GFX90A-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
 ; GFX90A-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
-; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
+; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX90A-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB136_6: ; %atomicrmw.phi
@@ -10888,15 +10908,18 @@ define void @flat_atomic_fmaximum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX950-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB136_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -10913,16 +10936,19 @@ define void @flat_atomic_fmaximum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX950-NEXT:    s_cbranch_execz .LBB136_6
 ; GFX950-NEXT:  ; %bb.5: ; %atomicrmw.private
 ; GFX950-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_nop 1
 ; GFX950-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 v6, v[2:3], off
 ; GFX950-NEXT:  .LBB136_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    s_or_b64 exec, exec, s[0:1]
@@ -10958,14 +10984,16 @@ define void @flat_atomic_fminimum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB137_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -10987,16 +11015,19 @@ define void @flat_atomic_fminimum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
 ; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
-; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX90A-NEXT:    buffer_store_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB137_6: ; %atomicrmw.phi
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX90A-NEXT:    ;;#ASMSTART
@@ -11024,15 +11055,18 @@ define void @flat_atomic_fminimum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX950-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB137_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11051,17 +11085,20 @@ define void @flat_atomic_fminimum_f64_ret_a_a(ptr %ptr) #0 {
 ; GFX950-NEXT:    s_cbranch_execz .LBB137_6
 ; GFX950-NEXT:  ; %bb.5: ; %atomicrmw.private
 ; GFX950-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_nop 1
 ; GFX950-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a0, v0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 v6, v[2:3], off
 ; GFX950-NEXT:  .LBB137_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    s_or_b64 exec, exec, s[0:1]
@@ -11095,14 +11132,16 @@ define void @flat_atomic_fminimum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB138_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11120,14 +11159,17 @@ define void @flat_atomic_fminimum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:  ; %bb.5: ; %atomicrmw.private
 ; GFX90A-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
 ; GFX90A-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
-; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
+; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX90A-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB138_6: ; %atomicrmw.phi
@@ -11155,15 +11197,18 @@ define void @flat_atomic_fminimum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX950-NEXT:  ; %bb.1: ; %atomicrmw.global
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB138_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11180,16 +11225,19 @@ define void @flat_atomic_fminimum_f64_ret_av_av(ptr %ptr) #0 {
 ; GFX950-NEXT:    s_cbranch_execz .LBB138_6
 ; GFX950-NEXT:  ; %bb.5: ; %atomicrmw.private
 ; GFX950-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_nop 1
 ; GFX950-NEXT:    v_cndmask_b32_e32 v6, -1, v6, vcc
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 v6, v[2:3], off
 ; GFX950-NEXT:  .LBB138_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    s_or_b64 exec, exec, s[0:1]
@@ -11702,17 +11750,22 @@ define void @flat_atomic_fmaximum_v2f16_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB147_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_max_f16 v2, v2, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -11767,21 +11820,26 @@ define void @flat_atomic_fmaximum_v2f16_ret_av_av(ptr %ptr) #0 {
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    flat_load_dword v3, v[0:1] offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB148_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_max_f16 v2, v2, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -11838,17 +11896,22 @@ define void @flat_atomic_fminimum_v2f16_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB149_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_min_f16 v2, v2, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -11903,21 +11966,26 @@ define void @flat_atomic_fminimum_v2f16_ret_av_av(ptr %ptr) #0 {
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    flat_load_dword v3, v[0:1] offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB150_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_min_f16 v2, v2, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -12622,32 +12690,35 @@ define void @flat_atomic_fmaximum_v2bf16_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB159_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_max_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_max_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -12713,32 +12784,35 @@ define void @flat_atomic_fmaximum_v2bf16_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB160_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_max_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_max_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -12802,32 +12876,35 @@ define void @flat_atomic_fminimum_v2bf16_ret_a_a(ptr %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB161_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_min_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_min_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -12893,32 +12970,35 @@ define void @flat_atomic_fminimum_v2bf16_ret_av_av(ptr %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB162_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_min_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_min_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -17872,14 +17952,15 @@ define void @flat_atomic_fmaximum_f32_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB231_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -17938,17 +18019,18 @@ define void @flat_atomic_fmaximum_f32_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dword v1, v[0:1] offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB232_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -18008,14 +18090,15 @@ define void @flat_atomic_fminimum_f32_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB233_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -18074,17 +18157,18 @@ define void @flat_atomic_fminimum_f32_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dword v1, v[0:1] offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB234_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v5
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -18983,14 +19067,16 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_pk_mov_b32 v[6:7], s[4:5], s[4:5] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB243_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19012,16 +19098,19 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, s4
 ; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
-; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX90A-NEXT:    buffer_store_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB243_6: ; %atomicrmw.phi
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; use a[0:1]
@@ -19048,15 +19137,18 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB243_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19076,14 +19168,17 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX950-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], off, s0
-; GFX950-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a0, v0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 off, v[2:3], s0
 ; GFX950-NEXT:  .LBB243_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    ;;#ASMSTART
@@ -19116,14 +19211,16 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_pk_mov_b32 v[6:7], s[4:5], s[4:5] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB244_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19141,14 +19238,17 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    s_cmp_lg_u64 s[4:5], 0
 ; GFX90A-NEXT:    s_cselect_b32 s4, s4, -1
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, s4
-; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
+; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX90A-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB244_6: ; %atomicrmw.phi
@@ -19175,15 +19275,18 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB244_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19201,13 +19304,16 @@ define void @flat_atomic_fmaximum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX950-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], off, s0
-; GFX950-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 off, v[2:3], s0
 ; GFX950-NEXT:  .LBB244_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    ;;#ASMSTART
@@ -19242,14 +19348,16 @@ define void @flat_atomic_fminimum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_pk_mov_b32 v[6:7], s[4:5], s[4:5] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB245_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19271,16 +19379,19 @@ define void @flat_atomic_fminimum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, s4
 ; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
-; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX90A-NEXT:    buffer_store_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB245_6: ; %atomicrmw.phi
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; use a[0:1]
@@ -19307,15 +19418,18 @@ define void @flat_atomic_fminimum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB245_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19335,14 +19449,17 @@ define void @flat_atomic_fminimum_f64_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX950-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], off, s0
-; GFX950-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a0, v0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    v_accvgpr_write_b32 a1, v1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 off, v[2:3], s0
 ; GFX950-NEXT:  .LBB245_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    ;;#ASMSTART
@@ -19375,14 +19492,16 @@ define void @flat_atomic_fminimum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_pk_mov_b32 v[6:7], s[4:5], s[4:5] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB246_2: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19400,14 +19519,17 @@ define void @flat_atomic_fminimum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    s_cmp_lg_u64 s[4:5], 0
 ; GFX90A-NEXT:    s_cselect_b32 s4, s4, -1
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, s4
-; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
+; GFX90A-NEXT:    buffer_load_dword v0, v6, s[0:3], 0 offen
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(1)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX90A-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX90A-NEXT:    buffer_store_dword v2, v6, s[0:3], 0 offen
 ; GFX90A-NEXT:    buffer_store_dword v3, v6, s[0:3], 0 offen offset:4
 ; GFX90A-NEXT:  .LBB246_6: ; %atomicrmw.phi
@@ -19434,15 +19556,18 @@ define void @flat_atomic_fminimum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX950-NEXT:    flat_load_dwordx2 v[2:3], v[6:7]
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB246_2: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -19460,13 +19585,16 @@ define void @flat_atomic_fminimum_f64_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX950-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX950-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX950-NEXT:    scratch_load_dwordx2 v[0:1], off, s0
-; GFX950-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    scratch_store_dwordx2 off, v[2:3], s0
 ; GFX950-NEXT:  .LBB246_6: ; %atomicrmw.phi
 ; GFX950-NEXT:    ;;#ASMSTART
@@ -20011,18 +20139,23 @@ define void @flat_atomic_fmaximum_v2f16_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB255_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_max_f16 v0, v0, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -20080,22 +20213,27 @@ define void @flat_atomic_fmaximum_v2f16_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dword v1, v[0:1] offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
-; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB256_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_max_f16 v0, v0, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -20155,18 +20293,23 @@ define void @flat_atomic_fminimum_v2f16_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB257_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_min_f16 v0, v0, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -20224,22 +20367,27 @@ define void @flat_atomic_fminimum_v2f16_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dword v1, v[0:1] offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
-; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB258_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v0, v1, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_min_f16 v0, v0, v6
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -20979,33 +21127,36 @@ define void @flat_atomic_fmaximum_v2bf16_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v0, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB267_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v1
-; GFX90A-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX90A-NEXT:    v_max_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v7
+; GFX90A-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -21074,33 +21225,36 @@ define void @flat_atomic_fmaximum_v2bf16_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB268_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v1
-; GFX90A-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX90A-NEXT:    v_max_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v7
+; GFX90A-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -21167,33 +21321,36 @@ define void @flat_atomic_fminimum_v2bf16_saddr_ret_a_a(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v0, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB269_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v1
-; GFX90A-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX90A-NEXT:    v_min_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v7
+; GFX90A-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -21262,33 +21419,36 @@ define void @flat_atomic_fminimum_v2bf16_saddr_ret_av_av(ptr inreg %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
 ; GFX90A-NEXT:  .LBB270_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v1
-; GFX90A-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX90A-NEXT:    v_min_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v7
+; GFX90A-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v7, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
 ; GFX90A-NEXT:    flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll
index b6fe0c756a106..83627ce1e1477 100644
--- a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll
@@ -6403,13 +6403,14 @@ define void @global_atomic_fmaximum_f32_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:  .LBB123_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -6465,16 +6466,17 @@ define void @global_atomic_fmaximum_f32_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dword v3, v[0:1], off offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB124_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -6531,13 +6533,14 @@ define void @global_atomic_fminimum_f32_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -6593,16 +6596,17 @@ define void @global_atomic_fminimum_f32_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dword v3, v[0:1], off offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB126_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v4, v2, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -7015,14 +7019,16 @@ define void @global_atomic_fmaximum_f64_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v7, a1
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v6, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB135_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[2:3], v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX90A-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7049,15 +7055,18 @@ define void @global_atomic_fmaximum_f64_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX950-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX950-NEXT:    v_accvgpr_read_b32 v7, a1
 ; GFX950-NEXT:    v_accvgpr_read_b32 v6, a0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB135_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[2:3], v[4:5], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7086,17 +7095,19 @@ define void @global_atomic_fmaximum_f64_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off offset:80
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[6:7]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB136_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[2:3], v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX90A-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7116,18 +7127,22 @@ define void @global_atomic_fmaximum_f64_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off offset:80
 ; GFX950-NEXT:    s_mov_b64 s[0:1], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; def v[6:7]
 ; GFX950-NEXT:    ;;#ASMEND
 ; GFX950-NEXT:  .LBB136_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[2:3], v[4:5], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7159,14 +7174,16 @@ define void @global_atomic_fminimum_f64_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v7, a1
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v6, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:  .LBB137_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[2:3], v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX90A-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7193,15 +7210,18 @@ define void @global_atomic_fminimum_f64_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX950-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX950-NEXT:    v_accvgpr_read_b32 v7, a1
 ; GFX950-NEXT:    v_accvgpr_read_b32 v6, a0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:  .LBB137_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[2:3], v[4:5], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7230,17 +7250,19 @@ define void @global_atomic_fminimum_f64_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off offset:80
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[6:7]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB138_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[2:3], v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX90A-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7260,18 +7282,22 @@ define void @global_atomic_fminimum_f64_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off offset:80
 ; GFX950-NEXT:    s_mov_b64 s[0:1], 0
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; def v[6:7]
 ; GFX950-NEXT:    ;;#ASMEND
 ; GFX950-NEXT:  .LBB138_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[2:3], v[4:5], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v6, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -7763,17 +7789,22 @@ define void @global_atomic_fmaximum_v2f16_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB147_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_max_f16 v2, v2, v6
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -7828,21 +7859,26 @@ define void @global_atomic_fmaximum_v2f16_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dword v3, v[0:1], off offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB148_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_max_f16 v2, v2, v6
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -7899,17 +7935,22 @@ define void @global_atomic_fminimum_v2f16_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB149_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_min_f16 v2, v2, v6
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -7964,21 +8005,26 @@ define void @global_atomic_fminimum_v2f16_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dword v3, v[0:1], off offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v4
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB150_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v2, v3, v4
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v3, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v5, v2, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v2, v2, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v7, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v2, v2
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cndmask_b32_e64 v6, v4, v2, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v7, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s8
+; GFX90A-NEXT:    v_perm_b32 v6, v8, v6, s8
+; GFX90A-NEXT:    v_pk_min_f16 v2, v2, v6
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -8683,32 +8729,35 @@ define void @global_atomic_fmaximum_v2bf16_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB159_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_max_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_max_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -8774,32 +8823,35 @@ define void @global_atomic_fmaximum_v2bf16_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB160_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_max_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_max_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -8863,32 +8915,35 @@ define void @global_atomic_fminimum_v2bf16_ret_a_a(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB161_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_min_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_min_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -8954,32 +9009,35 @@ define void @global_atomic_fminimum_v2bf16_ret_av_av(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX90A-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v2
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB162_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX90A-NEXT:    v_and_b32_e32 v7, 0xffff0000, v3
-; GFX90A-NEXT:    v_min_f32_e32 v8, v2, v4
-; GFX90A-NEXT:    v_min_f32_e32 v9, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v7, v6
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v4
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v5, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v5, v9, vcc
-; GFX90A-NEXT:    v_bfe_u32 v8, v2, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v10, v7, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v2
-; GFX90A-NEXT:    v_or_b32_e32 v11, 0x400000, v7
-; GFX90A-NEXT:    v_add3_u32 v8, v8, v2, s8
-; GFX90A-NEXT:    v_add3_u32 v10, v10, v7, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v4, v4
+; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v2, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v5, v6, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v7
+; GFX90A-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX90A-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX90A-NEXT:    v_add3_u32 v7, v7, v2, s8
+; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v2, v2
-; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v8, v9, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; GFX90A-NEXT:    v_perm_b32 v2, v7, v2, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX90A-NEXT:    v_perm_b32 v2, v6, v2, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
@@ -12188,13 +12246,14 @@ define void @global_atomic_fmaximum_f32_saddr_ret_a_a(ptr addrspace(1) inreg %pt
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:  .LBB231_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v4
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -12252,16 +12311,17 @@ define void @global_atomic_fmaximum_f32_saddr_ret_av_av(ptr addrspace(1) inreg %
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX90A-NEXT:    global_load_dword v1, v2, s[16:17] offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v3
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB232_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f32_e32 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v4
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -12320,13 +12380,14 @@ define void @global_atomic_fminimum_f32_saddr_ret_a_a(ptr addrspace(1) inreg %pt
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:  .LBB233_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v4
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -12384,16 +12445,17 @@ define void @global_atomic_fminimum_f32_saddr_ret_av_av(ptr addrspace(1) inreg %
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX90A-NEXT:    global_load_dword v1, v2, s[16:17] offset:40
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v3
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB234_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f32_e32 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v4
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -12821,14 +12883,16 @@ define void @global_atomic_fmaximum_f64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX90A-NEXT:  .LBB243_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -12856,15 +12920,18 @@ define void @global_atomic_fmaximum_f64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
 ; GFX950-NEXT:    v_accvgpr_read_b32 v5, a1
 ; GFX950-NEXT:    v_accvgpr_read_b32 v4, a0
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX950-NEXT:  .LBB243_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -12894,17 +12961,19 @@ define void @global_atomic_fmaximum_f64_saddr_ret_av_av(ptr addrspace(1) inreg %
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    global_load_dwordx2 v[2:3], v6, s[16:17] offset:80
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[4:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB244_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -12925,18 +12994,22 @@ define void @global_atomic_fmaximum_f64_saddr_ret_av_av(ptr addrspace(1) inreg %
 ; GFX950-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX950-NEXT:    global_load_dwordx2 v[2:3], v6, s[0:1] offset:80
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; def v[4:5]
 ; GFX950-NEXT:    ;;#ASMEND
 ; GFX950-NEXT:  .LBB244_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -12969,14 +13042,16 @@ define void @global_atomic_fminimum_f64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX90A-NEXT:  .LBB245_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -13004,15 +13079,18 @@ define void @global_atomic_fminimum_f64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
 ; GFX950-NEXT:    v_accvgpr_read_b32 v5, a1
 ; GFX950-NEXT:    v_accvgpr_read_b32 v4, a0
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX950-NEXT:  .LBB245_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -13042,17 +13120,19 @@ define void @global_atomic_fminimum_f64_saddr_ret_av_av(ptr addrspace(1) inreg %
 ; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX90A-NEXT:    global_load_dwordx2 v[2:3], v6, s[16:17] offset:80
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v[4:5]
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:  .LBB246_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX90A-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX90A-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -13073,18 +13153,22 @@ define void @global_atomic_fminimum_f64_saddr_ret_av_av(ptr addrspace(1) inreg %
 ; GFX950-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX950-NEXT:    global_load_dwordx2 v[2:3], v6, s[0:1] offset:80
 ; GFX950-NEXT:    s_mov_b64 s[2:3], 0
-; GFX950-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; def v[4:5]
 ; GFX950-NEXT:    ;;#ASMEND
 ; GFX950-NEXT:  .LBB246_1: ; %atomicrmw.start
 ; GFX950-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[2:3], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[4:5]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -13593,17 +13677,22 @@ define void @global_atomic_fmaximum_v2f16_saddr_ret_a_a(ptr addrspace(1) inreg %
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB255_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v4, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v4, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v6, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v3, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v6, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v5, v7, v5, s8
+; GFX90A-NEXT:    v_pk_max_f16 v0, v0, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -13660,21 +13749,26 @@ define void @global_atomic_fmaximum_v2f16_saddr_ret_av_av(ptr addrspace(1) inreg
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX90A-NEXT:    global_load_dword v1, v2, s[16:17] offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v3
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB256_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_max_f16 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v4, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v4, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v6, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v3, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v6, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v5, v7, v5, s8
+; GFX90A-NEXT:    v_pk_max_f16 v0, v0, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -13733,17 +13827,22 @@ define void @global_atomic_fminimum_v2f16_saddr_ret_a_a(ptr addrspace(1) inreg %
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
 ; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB257_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v4, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v4, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v6, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v3, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v6, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v5, v7, v5, s8
+; GFX90A-NEXT:    v_pk_min_f16 v0, v0, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -13800,21 +13899,26 @@ define void @global_atomic_fminimum_v2f16_saddr_ret_av_av(ptr addrspace(1) inreg
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX90A-NEXT:    global_load_dword v1, v2, s[16:17] offset:40
-; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def v3
 ; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
+; GFX90A-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX90A-NEXT:    s_mov_b32 s8, 0x5040100
 ; GFX90A-NEXT:  .LBB258_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    v_pk_min_f16 v0, v1, v3
-; GFX90A-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX90A-NEXT:    v_cmp_o_f16_e64 s[4:5], v1, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v4, v0, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_sdwa v0, v4, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX90A-NEXT:    v_perm_b32 v0, v0, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v1, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_sdwa v6, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX90A-NEXT:    v_cmp_u_f16_e64 s[4:5], v0, v0
+; GFX90A-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cndmask_b32_e64 v5, v3, v0, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v6, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s8
+; GFX90A-NEXT:    v_perm_b32 v5, v7, v5, s8
+; GFX90A-NEXT:    v_pk_min_f16 v0, v0, v5
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -14537,32 +14641,35 @@ define void @global_atomic_fmaximum_v2bf16_saddr_ret_a_a(ptr addrspace(1) inreg
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v0, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v4, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB267_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
-; GFX90A-NEXT:    v_max_f32_e32 v7, v0, v3
-; GFX90A-NEXT:    v_max_f32_e32 v8, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v4, v8, vcc
-; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v5, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v6
+; GFX90A-NEXT:    v_max_f32_e32 v5, v5, v7
+; GFX90A-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX90A-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v5, v0, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -14630,32 +14737,35 @@ define void @global_atomic_fmaximum_v2bf16_saddr_ret_av_av(ptr addrspace(1) inre
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v4, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB268_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
-; GFX90A-NEXT:    v_max_f32_e32 v7, v0, v3
-; GFX90A-NEXT:    v_max_f32_e32 v8, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v4, v8, vcc
-; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v5, vcc
+; GFX90A-NEXT:    v_max_f32_e32 v0, v0, v6
+; GFX90A-NEXT:    v_max_f32_e32 v5, v5, v7
+; GFX90A-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX90A-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v5, v0, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -14721,32 +14831,35 @@ define void @global_atomic_fminimum_v2bf16_saddr_ret_a_a(ptr addrspace(1) inreg
 ; GFX90A-NEXT:    v_accvgpr_read_b32 v0, a0
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v4, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB269_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
-; GFX90A-NEXT:    v_min_f32_e32 v7, v0, v3
-; GFX90A-NEXT:    v_min_f32_e32 v8, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v4, v8, vcc
-; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v5, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v6
+; GFX90A-NEXT:    v_min_f32_e32 v5, v5, v7
+; GFX90A-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX90A-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v5, v0, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
@@ -14814,32 +14927,35 @@ define void @global_atomic_fminimum_v2bf16_saddr_ret_av_av(ptr addrspace(1) inre
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX90A-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX90A-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
+; GFX90A-NEXT:    v_and_b32_e32 v4, 0xffff0000, v0
 ; GFX90A-NEXT:    s_mov_b32 s9, 0x7060302
 ; GFX90A-NEXT:  .LBB270_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX90A-NEXT:    v_and_b32_e32 v6, 0xffff0000, v1
-; GFX90A-NEXT:    v_min_f32_e32 v7, v0, v3
-; GFX90A-NEXT:    v_min_f32_e32 v8, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e32 vcc, v6, v5
-; GFX90A-NEXT:    v_cmp_o_f32_e64 s[4:5], v0, v3
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v4, v8, vcc
-; GFX90A-NEXT:    v_bfe_u32 v7, v0, 16, 1
-; GFX90A-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX90A-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX90A-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX90A-NEXT:    v_add3_u32 v7, v7, v0, s8
-; GFX90A-NEXT:    v_add3_u32 v9, v9, v6, s8
-; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v3, v3
+; GFX90A-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX90A-NEXT:    v_cndmask_b32_e32 v7, v4, v5, vcc
+; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v6
+; GFX90A-NEXT:    v_min_f32_e32 v5, v5, v7
+; GFX90A-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX90A-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX90A-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX90A-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX90A-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX90A-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX90A-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX90A-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v7, v8, s[4:5]
-; GFX90A-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX90A-NEXT:    v_perm_b32 v0, v6, v0, s9
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX90A-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX90A-NEXT:    v_perm_b32 v0, v5, v0, s9
 ; GFX90A-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[16:17] offset:40 glc
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index e7685d53b2d10..72644a0a40df2 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx801 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx801 -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s
@@ -7,6 +8,30 @@
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 ; GFX9: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_no_fold_canonicalize_loaded_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_no_fold_canonicalize_loaded_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %v = load float, ptr addrspace(1) %gep, align 4
@@ -20,6 +45,30 @@ define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(ptr addrsp
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 ; GCN-NOT: 1.0
 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fmul_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 0x41700000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fmul_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_f32_e32 v1, 0x41700000, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -35,6 +84,30 @@ define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fmul_legacy_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_legacy_f32_e32 v2, 0x41700000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fmul_legacy_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, 0x41700000, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -50,6 +123,30 @@ define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(ptr addr
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_sub_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_sub_f32_e32 v2, 0x41700000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_sub_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_sub_f32_e32 v1, 0x41700000, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -65,6 +162,30 @@ define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_add_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v2, 0x41700000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_add_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v1, 0x41700000, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -80,6 +201,30 @@ define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_sqrt_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_sqrt_f32_e32 v2, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_sqrt_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_sqrt_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -95,6 +240,30 @@ define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fceil_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_ceil_f32_e32 v2, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fceil_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ceil_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -110,6 +279,30 @@ define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(ptr addrspace(
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_floor_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_floor_f32_e32 v2, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_floor_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -126,6 +319,32 @@ define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(ptr addrspace(
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fma_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_mov_b32 s0, 0x41700000
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_fma_f32 v2, v2, s0, s0
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fma_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_mov_b32 s2, 0x41700000
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_fma_f32 v1, v1, s2, s2
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -141,6 +360,32 @@ define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x41700000
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mac_f32_e32 v3, 0x41700000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x41700000
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mac_f32_e32 v2, 0x41700000, v1
+; GFX9-NEXT:    global_store_dword v0, v2, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -159,6 +404,59 @@ define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(ptr addrspa
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 ; GCN-NOT: 1.0
 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(ptr addrspace(1) %arg) {
+; VI-FLUSH-LABEL: test_fold_canonicalize_fmuladd_value_f32:
+; VI-FLUSH:       ; %bb.0:
+; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-FLUSH-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-FLUSH-NEXT:    v_mov_b32_e32 v3, 0x41700000
+; VI-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-FLUSH-NEXT:    v_mov_b32_e32 v1, s1
+; VI-FLUSH-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-FLUSH-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-FLUSH-NEXT:    flat_load_dword v2, v[0:1]
+; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0)
+; VI-FLUSH-NEXT:    v_mac_f32_e32 v3, 0x41700000, v2
+; VI-FLUSH-NEXT:    flat_store_dword v[0:1], v3
+; VI-FLUSH-NEXT:    s_endpgm
+;
+; VI-DENORM-LABEL: test_fold_canonicalize_fmuladd_value_f32:
+; VI-DENORM:       ; %bb.0:
+; VI-DENORM-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-DENORM-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-DENORM-NEXT:    v_mov_b32_e32 v1, s1
+; VI-DENORM-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-DENORM-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-DENORM-NEXT:    flat_load_dword v2, v[0:1]
+; VI-DENORM-NEXT:    s_mov_b32 s0, 0x41700000
+; VI-DENORM-NEXT:    s_waitcnt vmcnt(0)
+; VI-DENORM-NEXT:    v_fma_f32 v2, v2, s0, s0
+; VI-DENORM-NEXT:    flat_store_dword v[0:1], v2
+; VI-DENORM-NEXT:    s_endpgm
+;
+; GFX9-DENORM-LABEL: test_fold_canonicalize_fmuladd_value_f32:
+; GFX9-DENORM:       ; %bb.0:
+; GFX9-DENORM-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT:    s_mov_b32 s2, 0x41700000
+; GFX9-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT:    v_fma_f32 v1, v1, s2, s2
+; GFX9-DENORM-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT:    s_endpgm
+;
+; GFX9-FLUSH-LABEL: test_fold_canonicalize_fmuladd_value_f32:
+; GFX9-FLUSH:       ; %bb.0:
+; GFX9-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT:    v_mov_b32_e32 v2, 0x41700000
+; GFX9-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-FLUSH-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT:    v_mac_f32_e32 v2, 0x41700000, v1
+; GFX9-FLUSH-NEXT:    global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -177,6 +475,30 @@ define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(ptr addrspac
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_canonicalize_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_canonicalize_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -192,6 +514,35 @@ define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
+; VI-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_dword v1, v[1:2]
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v1
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f64_f32_e32 v[1:2], v1
+; GFX9-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -208,6 +559,35 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
+; VI-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_ushort v1, v[1:2]
+; VI-NEXT:    v_mov_b32_e32 v2, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -224,6 +604,35 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
+; VI-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_ushort v1, v[1:2]
+; VI-NEXT:    v_mov_b32_e32 v2, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -240,6 +649,35 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
+; VI-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f32_f64_e32 v2, v[1:2]
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx2 v[1:2], v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f64_e32 v1, v[1:2]
+; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %load = load double, ptr addrspace(1) %gep, align 8
@@ -256,6 +694,35 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(ptr addr
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
+; VI-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_dword v1, v[1:2]
+; VI-NEXT:    v_mov_b32_e32 v2, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f16_f32_e32 v3, v1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; VI-NEXT:    flat_store_short v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -272,6 +739,35 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(ptr addr
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
+; VI-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_dword v1, v[1:2]
+; VI-NEXT:    v_mov_b32_e32 v2, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f16_f32_e32 v3, v1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; VI-NEXT:    flat_store_short v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -292,6 +788,39 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
+; VI-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; VI-NEXT:    v_cvt_f16_f32_sdwa v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v1, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx2 v[1:2], v1, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX9-NEXT:    v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %arg, i32 %id
   %load = load <2 x float>, ptr addrspace(1) %gep, align 8
@@ -306,6 +835,30 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(ptr
 ; VI:  v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_no_fold_canonicalize_fneg_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, -1.0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_no_fold_canonicalize_fneg_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -321,6 +874,32 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(ptr addrspac
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fneg_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v2, 0, v2
+; VI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fneg_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -335,6 +914,30 @@ define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(ptr addrspace(1
 ; VI:  v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
 define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_no_fold_canonicalize_fabs_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e64 v2, 1.0, |v2|
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_no_fold_canonicalize_fabs_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -351,6 +954,30 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(ptr addrspac
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
 define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr addrspace(1) %arg, float %sign) {
+; VI-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e64 v2, 1.0, |v2|
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -368,6 +995,32 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_fabs_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v2, 0, v2
+; VI-NEXT:    v_and_b32_e32 v2, 0x7fffffff, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_fabs_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -384,6 +1037,33 @@ define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_sin_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 0.15915494, v2
+; VI-NEXT:    v_fract_f32_e32 v2, v2
+; VI-NEXT:    v_sin_f32_e32 v2, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_sin_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_f32_e32 v1, 0.15915494, v1
+; GFX9-NEXT:    v_sin_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -399,6 +1079,33 @@ define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_cos_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 0.15915494, v2
+; VI-NEXT:    v_fract_f32_e32 v2, v2
+; VI-NEXT:    v_cos_f32_e32 v2, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_cos_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_f32_e32 v1, 0.15915494, v1
+; GFX9-NEXT:    v_cos_f32_e32 v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -414,6 +1121,33 @@ define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V0]]
 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_sin_value_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
+; VI-NEXT:    v_fract_f16_e32 v2, v2
+; VI-NEXT:    v_sin_f16_e32 v2, v2
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_sin_value_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX9-NEXT:    v_sin_f16_e32 v1, v1
+; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -429,6 +1163,33 @@ define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V0]]
 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_cos_value_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
+; VI-NEXT:    v_fract_f16_e32 v2, v2
+; VI-NEXT:    v_cos_f16_e32 v2, v2
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_cos_value_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX9-NEXT:    v_cos_f16_e32 v1, v1
+; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -444,6 +1205,26 @@ define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_qNaN_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_qNaN_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
@@ -462,6 +1243,32 @@ define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(ptr addrspace(1
 
 ; GFX9: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-NEXT:    v_min_f32_e32 v2, 0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    v_min_f32_e32 v1, 0, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -487,6 +1294,32 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_iee
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_minnum_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v2, 0, v2
+; VI-NEXT:    v_min_f32_e32 v2, 0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_minnum_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
+; GFX9-NEXT:    v_min_f32_e32 v1, 0, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -504,6 +1337,32 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
 ; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_sNaN_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-NEXT:    v_min_f32_e32 v2, 0x7fc00000, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_sNaN_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    v_min_f32_e32 v1, 0x7fc00000, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -537,6 +1396,32 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN:  {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-NEXT:    v_max_f32_e32 v2, 0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    v_max_f32_e32 v1, 0, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -552,6 +1437,32 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_iee
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_maxnum_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v2, 0, v2
+; VI-NEXT:    v_max_f32_e32 v2, 0, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_maxnum_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
+; GFX9-NEXT:    v_max_f32_e32 v1, 0, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -568,6 +1479,32 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(ptr addrspace
 ; GCN-NOT: v_max
 ; GCN:  {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_maxnum_value_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f64 v[2:3], v[2:3], 0
+; VI-NEXT:    v_max_f64 v[2:3], v[2:3], 0
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_maxnum_value_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], 0
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], 0
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %load = load double, ptr addrspace(1) %gep, align 8
@@ -584,6 +1521,10 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(ptr addrspace
 ; GCN-NOT: v_max
 ; GCN-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
+; GCN-LABEL: test_fold_canonicalize_fmul_value_f32_no_ieee:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    v_mul_f32_e32 v0, 0x41700000, v0
+; GCN-NEXT:    ; return to shader part epilog
 entry:
   %v = fmul float %arg, 15.0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
@@ -596,6 +1537,10 @@ entry:
 ; GCN-NOT: v_max
 ; GCN-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
+; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    v_mul_f32_e32 v0, 0x41700000, v0
+; GCN-NEXT:    ; return to shader part epilog
 entry:
   %v = fmul nnan float %arg, 15.0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
@@ -608,6 +1553,73 @@ entry:
 ; GCN-NOT: v_mul
 ; GCN: ; return
 define amdgpu_ps float @test_fold_canonicalize_fdiv_value_f32_no_ieee(float %arg0) {
+; VI-FLUSH-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
+; VI-FLUSH:       ; %bb.0: ; %entry
+; VI-FLUSH-NEXT:    s_mov_b32 s2, 0x41700000
+; VI-FLUSH-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
+; VI-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
+; VI-FLUSH-NEXT:    v_rcp_f32_e32 v3, v1
+; VI-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; VI-FLUSH-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; VI-FLUSH-NEXT:    v_fma_f32 v3, v4, v3, v3
+; VI-FLUSH-NEXT:    v_mul_f32_e32 v4, v2, v3
+; VI-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; VI-FLUSH-NEXT:    v_fma_f32 v4, v5, v3, v4
+; VI-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; VI-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; VI-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; VI-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
+; VI-FLUSH-NEXT:    ; return to shader part epilog
+;
+; VI-DENORM-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
+; VI-DENORM:       ; %bb.0: ; %entry
+; VI-DENORM-NEXT:    s_mov_b32 s2, 0x41700000
+; VI-DENORM-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
+; VI-DENORM-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
+; VI-DENORM-NEXT:    v_rcp_f32_e32 v3, v1
+; VI-DENORM-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; VI-DENORM-NEXT:    v_fma_f32 v3, v4, v3, v3
+; VI-DENORM-NEXT:    v_mul_f32_e32 v4, v2, v3
+; VI-DENORM-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; VI-DENORM-NEXT:    v_fma_f32 v4, v5, v3, v4
+; VI-DENORM-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; VI-DENORM-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; VI-DENORM-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
+; VI-DENORM-NEXT:    ; return to shader part epilog
+;
+; GFX9-DENORM-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
+; GFX9-DENORM:       ; %bb.0: ; %entry
+; GFX9-DENORM-NEXT:    s_mov_b32 s2, 0x41700000
+; GFX9-DENORM-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
+; GFX9-DENORM-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
+; GFX9-DENORM-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX9-DENORM-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; GFX9-DENORM-NEXT:    v_fma_f32 v3, v4, v3, v3
+; GFX9-DENORM-NEXT:    v_mul_f32_e32 v4, v2, v3
+; GFX9-DENORM-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; GFX9-DENORM-NEXT:    v_fma_f32 v4, v5, v3, v4
+; GFX9-DENORM-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX9-DENORM-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; GFX9-DENORM-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
+; GFX9-DENORM-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLUSH-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
+; GFX9-FLUSH:       ; %bb.0: ; %entry
+; GFX9-FLUSH-NEXT:    s_mov_b32 s2, 0x41700000
+; GFX9-FLUSH-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
+; GFX9-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
+; GFX9-FLUSH-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX9-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX9-FLUSH-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; GFX9-FLUSH-NEXT:    v_fma_f32 v3, v4, v3, v3
+; GFX9-FLUSH-NEXT:    v_mul_f32_e32 v4, v2, v3
+; GFX9-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; GFX9-FLUSH-NEXT:    v_fma_f32 v4, v5, v3, v4
+; GFX9-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX9-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; GFX9-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; GFX9-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
+; GFX9-FLUSH-NEXT:    ; return to shader part epilog
 entry:
   %v = fdiv float 15.0, %arg0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
@@ -622,6 +1634,33 @@ entry:
 ; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 ; GFX9-FLUSH: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
+; VI-LABEL: test_fold_canonicalize_load_nnan_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v3, 1.0, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_load_nnan_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %v = load float, ptr addrspace(1) %gep, align 4
@@ -637,6 +1676,33 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrsp
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
+; VI-LABEL: test_fold_canonicalize_load_nnan_value_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_load_nnan_value_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %v = load double, ptr addrspace(1) %gep, align 8
@@ -651,6 +1717,33 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrsp
 ; GCN: v_max_f16_e32 [[V2:v[0-9]+]], [[V1]], [[V1]]
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V2]]
 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
+; VI-LABEL: test_fold_canonicalize_load_nnan_value_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v0, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_max_f16_e32 v3, v0, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_store_short v[0:1], v3
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_load_nnan_value_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %v = load half, ptr addrspace(1) %gep, align 2
@@ -667,6 +1760,45 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrsp
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
 define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(ptr addrspace(1) %arg) {
+; VI-LABEL: test_fold_canonicalize_select_value_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v2, v[0:1] glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    flat_load_dword v3, v[0:1] glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    flat_load_dword v4, v[0:1] glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v2, 0x41700000, v2
+; VI-NEXT:    v_add_f32_e32 v3, 0x42000000, v3
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; VI-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_fold_canonicalize_select_value_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[0:1] glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_load_dword v2, v0, s[0:1] glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; kill: killed $vgpr0_vgpr1
+; GFX9-NEXT:    global_load_dword v3, v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v1, 0x41700000, v1
+; GFX9-NEXT:    v_add_f32_e32 v2, 0x42000000, v2
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load0 = load volatile float, ptr addrspace(1) %gep, align 4
@@ -699,6 +1831,21 @@ define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(ptr addrspace
 ; VI-DENORM: v_min_f32_e32 v0, v0, v1
 ; VI-DENORM-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %arg0, float %arg1) {
+; VI-FLUSH-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode:
+; VI-FLUSH:       ; %bb.0:
+; VI-FLUSH-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-FLUSH-NEXT:    ; return to shader part epilog
+;
+; VI-DENORM-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode:
+; VI-DENORM:       ; %bb.0:
+; VI-DENORM-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-DENORM-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    ; return to shader part epilog
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -714,6 +1861,21 @@ define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %
 
 ; VI-NEXT: s_setpc_b64
 define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) {
+; VI-LABEL: test_fold_canonicalize_minnum_value_ieee_mode:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: test_fold_canonicalize_minnum_value_ieee_mode:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -725,6 +1887,21 @@ define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) #1 {
+; VI-FLUSH-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
+; VI-FLUSH:       ; %bb.0:
+; VI-FLUSH-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-FLUSH-NEXT:    ; return to shader part epilog
+;
+; VI-DENORM-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
+; VI-DENORM:       ; %bb.0:
+; VI-DENORM-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-DENORM-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    ; return to shader part epilog
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -736,6 +1913,22 @@ define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(fl
 ; GFX9-NOT: v_max
 ; GFX9-NOT: v_pk_max
 define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
+; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v2, 0x4400
+; VI-NEXT:    v_add_f16_e32 v1, 1.0, v0
+; VI-NEXT:    v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_build_vector_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_f16_e32 v1, 1.0, v0
+; GFX9-NEXT:    v_mul_f16_e32 v0, 4.0, v0
+; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %lo = extractelement <2 x half> %vec, i32 0
   %hi = extractelement <2 x half> %vec, i32 1
   %lo.op = fadd half %lo, 1.0
@@ -750,6 +1943,22 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
 ; GFX9: v_add_f16_e32
 ; GFX9: v_pk_max
 define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %vec) {
+; VI-LABEL: v_test_canonicalize_build_vector_noncanon1_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_f16_e32 v1, 1.0, v0
+; VI-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_build_vector_noncanon1_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_f16_e32 v1, 1.0, v0
+; GFX9-NEXT:    s_mov_b32 s4, 0xffff
+; GFX9-NEXT:    v_bfi_b32 v0, s4, v1, v0
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %lo = extractelement <2 x half> %vec, i32 0
   %lo.op = fadd half %lo, 1.0
   %ins = insertelement <2 x half> %vec, half %lo.op, i32 0
@@ -761,6 +1970,24 @@ define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %
 ; GFX9: v_add_f16_sdwa
 ; GFX9: v_pk_max
 define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %vec) {
+; VI-LABEL: v_test_canonicalize_build_vector_noncanon0_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; VI-NEXT:    v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_max_f16_e32 v0, v0, v0
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_build_vector_noncanon0_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX9-NEXT:    v_add_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %hi = extractelement <2 x half> %vec, i32 1
   %hi.op = fadd half %hi, 1.0
   %ins = insertelement <2 x half> %vec, half %hi.op, i32 1
@@ -773,6 +2000,11 @@ define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %
 ; GFX9-NEXT: v_mul_f16_e32 v0, 4.0, v0
 ; GFX9-NEXT: s_setpc_b64
 define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
+; GCN-LABEL: v_test_canonicalize_extract_element_v2f16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f16_e32 v0, 4.0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
   %elt = extractelement <2 x half> %vec.op, i32 0
   %canonicalized = call half @llvm.canonicalize.f16(half %elt)
@@ -792,6 +2024,32 @@ define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
 ; GFX9: v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT: s_setpc_b64
 define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x half> %vec, half %val, i32 %idx) {
+; VI-LABEL: v_test_canonicalize_insertelement_noncanon_vec_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mul_f16_e32 v1, 0x4800, v1
+; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_or_b32_e32 v1, v1, v3
+; VI-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
+; VI-NEXT:    v_bfi_b32 v0, v2, v1, v0
+; VI-NEXT:    v_max_f16_sdwa v1, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_max_f16_e32 v0, v0, v0
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_insertelement_noncanon_vec_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_f16_e32 v1, 0x4800, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; GFX9-NEXT:    s_mov_b32 s4, 0xffff
+; GFX9-NEXT:    v_pack_b32_f16 v1, v1, v1
+; GFX9-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
+; GFX9-NEXT:    v_bfi_b32 v0, v2, v1, v0
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %ins.op = fmul half %val, 8.0
   %ins = insertelement <2 x half> %vec, half %ins.op, i32 %idx
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
@@ -803,6 +2061,37 @@ define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x hal
 ; GFX9: v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT: s_setpc_b64
 define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x half> %vec, half %val, i32 %idx) {
+; VI-LABEL: v_test_canonicalize_insertelement_noncanon_insval_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v3, 0x4400
+; VI-NEXT:    v_mul_f16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_mul_f16_e32 v0, 4.0, v0
+; VI-NEXT:    v_or_b32_e32 v0, v0, v3
+; VI-NEXT:    v_mov_b32_e32 v3, 16
+; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
+; VI-NEXT:    v_bfi_b32 v0, v2, v1, v0
+; VI-NEXT:    v_max_f16_sdwa v1, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_max_f16_e32 v0, v0, v0
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_insertelement_noncanon_insval_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v1, v1, v1, s4
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; GFX9-NEXT:    s_mov_b32 s4, 0xffff
+; GFX9-NEXT:    v_pk_mul_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
+; GFX9-NEXT:    v_bfi_b32 v0, v2, v1, v0
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
   %ins = insertelement <2 x half> %vec.op, half %val, i32 %idx
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
@@ -820,6 +2109,11 @@ define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x
 ; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
+; GCN-LABEL: v_test_canonicalize_cubeid:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_cubeid_f32 v0, v0, v1, v2
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
   %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
   ret float %canonicalized
@@ -830,6 +2124,11 @@ define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
 ; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_frexp_mant(float %a) {
+; GCN-LABEL: v_test_canonicalize_frexp_mant:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_frexp_mant_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
   %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
   ret float %canonicalized
@@ -840,6 +2139,11 @@ define float @v_test_canonicalize_frexp_mant(float %a) {
 ; GCN-NEXT: v_log_f32
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_amdgcn_log(float %a) {
+; GCN-LABEL: v_test_canonicalize_amdgcn_log:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %log = call float @llvm.amdgcn.log.f32(float %a)
   %canonicalized = call float @llvm.canonicalize.f32(float %log)
   ret float %canonicalized
@@ -850,6 +2154,11 @@ define float @v_test_canonicalize_amdgcn_log(float %a) {
 ; GCN-NEXT: v_exp_f32
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_amdgcn_exp2(float %a) {
+; GCN-LABEL: v_test_canonicalize_amdgcn_exp2:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %log = call float @llvm.amdgcn.exp2.f32(float %a)
   %canonicalized = call float @llvm.canonicalize.f32(float %log)
   ret float %canonicalized
@@ -864,6 +2173,36 @@ define float @v_test_canonicalize_amdgcn_exp2(float %a) {
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_minimum(float %a, float %b) {
+; VI-FLUSH-LABEL: v_test_canonicalize_minimum:
+; VI-FLUSH:       ; %bb.0:
+; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; VI-FLUSH-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-DENORM-LABEL: v_test_canonicalize_minimum:
+; VI-DENORM:       ; %bb.0:
+; VI-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; VI-DENORM-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; VI-DENORM-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; VI-DENORM-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-DENORM-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_minimum:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -878,6 +2217,36 @@ define float @v_test_canonicalize_minimum(float %a, float %b) {
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_maximum(float %a, float %b) {
+; VI-FLUSH-LABEL: v_test_canonicalize_maximum:
+; VI-FLUSH:       ; %bb.0:
+; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; VI-FLUSH-NEXT:    v_max_f32_e32 v0, v0, v1
+; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-DENORM-LABEL: v_test_canonicalize_maximum:
+; VI-DENORM:       ; %bb.0:
+; VI-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; VI-DENORM-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; VI-DENORM-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; VI-DENORM-NEXT:    v_max_f32_e32 v0, v0, v1
+; VI-DENORM-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_maximum:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.maximum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -894,6 +2263,21 @@ define float @v_test_canonicalize_maximum(float %a, float %b) {
 ; GCN-NEXT: v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_minimumnum(float %a, float %b) {
+; VI-LABEL: v_test_canonicalize_minimumnum:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_minimumnum:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -910,6 +2294,21 @@ define float @v_test_canonicalize_minimumnum(float %a, float %b) {
 ; GCN-NEXT: v_max_f32_e32 v0, v0, v1
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_maximumnum(float %a, float %b) {
+; VI-LABEL: v_test_canonicalize_maximumnum:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; VI-NEXT:    v_max_f32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_canonicalize_maximumnum:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.maximumnum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -949,3 +2348,6 @@ declare float @llvm.amdgcn.exp2.f32(float) #0
 attributes #0 = { nounwind readnone }
 attributes #1 = { "no-nans-fp-math"="true" }
 attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN-DENORM: {{.*}}
+; GCN-FLUSH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
index 0283b5ff5d439..7963b04392f78 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
@@ -7,13 +7,22 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
 
 define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) {
-; GFX9-LABEL: test_fmaximum_f32_vv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f32_vv:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f32_vv:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_f32_vv:
 ; GFX12:       ; %bb.0:
@@ -24,14 +33,25 @@ define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) {
 }
 
 define amdgpu_ps float @test_fmaximum_f32_ss(float inreg %a, float inreg %b) {
-; GFX9-LABEL: test_fmaximum_f32_ss:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f32_ss:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s1, s1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f32_ss:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_f32_ss:
 ; GFX12:       ; %bb.0:
@@ -44,13 +64,23 @@ define amdgpu_ps float @test_fmaximum_f32_ss(float inreg %a, float inreg %b) {
 }
 
 define amdgpu_ps float @test_fmaximum_f32_vs(float %a, float inreg %b) {
-; GFX9-LABEL: test_fmaximum_f32_vs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f32_vs:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s0, s0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f32_vs:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_f32_vs:
 ; GFX12:       ; %bb.0:
@@ -75,13 +105,22 @@ define amdgpu_ps float @test_fmaximum_nnan_f32(float %a, float %b) {
 }
 
 define amdgpu_ps float @test_fmaximum_nsz_f32(float %a, float %b) {
-; GFX9-LABEL: test_fmaximum_nsz_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_nsz_f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_nsz_f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_nsz_f32:
 ; GFX12:       ; %bb.0:
@@ -106,16 +145,30 @@ define amdgpu_ps float @test_fmaximum_signed_zero_f32() {
 }
 
 define amdgpu_ps <2 x float> @test_fmaximum_v2f32(<2 x float> %a, <2 x float> %b) {
-; GFX9-LABEL: test_fmaximum_v2f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_v2f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_v2f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v0, v2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v1, v3
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_v2f32:
 ; GFX12:       ; %bb.0:
@@ -127,18 +180,36 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32(<2 x float> %a, <2 x float> %b
 }
 
 define amdgpu_ps <2 x float> @test_fmaximum_v2f32_ss(<2 x float> inreg %a, <2 x float> inreg %b) {
-; GFX9-LABEL: test_fmaximum_v2f32_ss:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_max_f32_e32 v3, s1, v1
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s1, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_v2f32_ss:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_v2f32_ss:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, s1, v1
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s1, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_v2f32_ss:
 ; GFX12:       ; %bb.0:
@@ -152,19 +223,38 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32_ss(<2 x float> inreg %a, <2 x
 }
 
 define amdgpu_ps <3 x float> @test_fmaximum_v3f32(<3 x float> %a, <3 x float> %b) {
-; GFX9-LABEL: test_fmaximum_v3f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_v3f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_v3f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v6, v0, v3
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v1, v4
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v2, v5
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_v3f32:
 ; GFX12:       ; %bb.0:
@@ -177,22 +267,46 @@ define amdgpu_ps <3 x float> @test_fmaximum_v3f32(<3 x float> %a, <3 x float> %b
 }
 
 define amdgpu_ps <4 x float> @test_fmaximum_v4f32(<4 x float> %a, <4 x float> %b) {
-; GFX9-LABEL: test_fmaximum_v4f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_v4f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v3, v3, v4
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_v4f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v8, v0, v4
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v1, v5
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v2, v6
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v3, v7
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_v4f32:
 ; GFX12:       ; %bb.0:
@@ -206,58 +320,142 @@ define amdgpu_ps <4 x float> @test_fmaximum_v4f32(<4 x float> %a, <4 x float> %b
 }
 
 define amdgpu_ps <16 x float> @test_fmaximum_v16f32(<16 x float> %a, <16 x float> %b) {
-; GFX9-LABEL: test_fmaximum_v16f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v32, v1, v17
-; GFX9-NEXT:    v_mov_b32_e32 v33, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v16
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[12:13], v0, v16
-; GFX9-NEXT:    v_max_f32_e32 v17, v2, v18
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[0:1], v2, v18
-; GFX9-NEXT:    v_max_f32_e32 v18, v3, v19
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[2:3], v3, v19
-; GFX9-NEXT:    v_max_f32_e32 v19, v4, v20
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[4:5], v4, v20
-; GFX9-NEXT:    v_max_f32_e32 v20, v5, v21
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[6:7], v5, v21
-; GFX9-NEXT:    v_max_f32_e32 v21, v6, v22
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[8:9], v6, v22
-; GFX9-NEXT:    v_max_f32_e32 v22, v7, v23
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[10:11], v7, v23
-; GFX9-NEXT:    v_max_f32_e32 v23, v8, v24
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v33, v1, s[12:13]
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v8, v24
-; GFX9-NEXT:    v_max_f32_e32 v34, v9, v25
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v33, v23, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v9, v25
-; GFX9-NEXT:    v_max_f32_e32 v35, v10, v26
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v33, v34, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v10, v26
-; GFX9-NEXT:    v_max_f32_e32 v36, v11, v27
-; GFX9-NEXT:    v_cndmask_b32_e32 v10, v33, v35, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v11, v27
-; GFX9-NEXT:    v_max_f32_e32 v37, v12, v28
-; GFX9-NEXT:    v_cndmask_b32_e32 v11, v33, v36, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v12, v28
-; GFX9-NEXT:    v_max_f32_e32 v16, v13, v29
-; GFX9-NEXT:    v_cndmask_b32_e32 v12, v33, v37, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v13, v29
-; GFX9-NEXT:    v_cndmask_b32_e32 v13, v33, v16, vcc
-; GFX9-NEXT:    v_max_f32_e32 v16, v14, v30
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v14, v30
-; GFX9-NEXT:    v_cndmask_b32_e32 v14, v33, v16, vcc
-; GFX9-NEXT:    v_max_f32_e32 v16, v15, v31
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v15, v31
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v33, v17, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, v33, v18, s[2:3]
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, v33, v19, s[4:5]
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v33, v20, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, v33, v21, s[8:9]
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, v33, v22, s[10:11]
-; GFX9-NEXT:    v_cndmask_b32_e32 v15, v33, v16, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_v16f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v2, v2, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v3, v3, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v4, v4, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v21, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v5, v5, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v6, v6, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v23, v7, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v7, v7, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v8, v8, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v25, v9, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v9, v9, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v10, v10, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v27, v11, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v11, v11, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v12, v12, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v29, v13, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v13, v13, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v31, v31
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v14, v14, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v31, v15, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v15, v15, v16
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_v16f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v32, v1, v17
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v33, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v0, v16
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[12:13], v0, v16
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v17, v2, v18
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[0:1], v2, v18
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v18, v3, v19
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[2:3], v3, v19
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v19, v4, v20
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[4:5], v4, v20
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v20, v5, v21
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[6:7], v5, v21
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v21, v6, v22
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[8:9], v6, v22
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v22, v7, v23
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[10:11], v7, v23
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v23, v8, v24
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v33, v1, s[12:13]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v8, v24
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v34, v9, v25
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v8, v33, v23, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v9, v25
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v35, v10, v26
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v9, v33, v34, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v10, v26
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v36, v11, v27
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v10, v33, v35, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v11, v27
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v37, v12, v28
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v11, v33, v36, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v12, v28
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v16, v13, v29
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v12, v33, v37, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v13, v29
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v13, v33, v16, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v16, v14, v30
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v14, v30
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v14, v33, v16, vcc
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v16, v15, v31
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v15, v31
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, v33, v17, s[0:1]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v3, v33, v18, s[2:3]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v4, v33, v19, s[4:5]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v5, v33, v20, s[6:7]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v6, v33, v21, s[8:9]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v7, v33, v22, s[10:11]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v15, v33, v16, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_v16f32:
 ; GFX12:       ; %bb.0:
@@ -283,13 +481,22 @@ define amdgpu_ps <16 x float> @test_fmaximum_v16f32(<16 x float> %a, <16 x float
 }
 
 define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) {
-; GFX9-LABEL: test_fmaximum_f16_vv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f16_vv:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f16_vv:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f16_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv:
 ; GFX12-SDAG-TRUE16:       ; %bb.0:
@@ -315,14 +522,25 @@ define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) {
 }
 
 define amdgpu_ps half @test_fmaximum_f16_ss(half inreg %a, half inreg %b) {
-; GFX9-LABEL: test_fmaximum_f16_ss:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_max_f16_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f16_ss:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f16_ss:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-GISEL-NEXT:    v_max_f16_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_f16_ss:
 ; GFX12:       ; %bb.0:
@@ -337,14 +555,19 @@ define amdgpu_ps half @test_fmaximum_f16_ss(half inreg %a, half inreg %b) {
 define amdgpu_ps <2 x half> @test_fmaximum_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v2f16_vv:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX9-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v2f16_vv:
@@ -370,19 +593,25 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_vv(<2 x half> %a, <2 x half> %b
 define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x half> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v2f16_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, s0, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v2f16_ss:
@@ -412,17 +641,32 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x ha
 define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v3f16_vv:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX9-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v3f16_vv:
@@ -464,24 +708,44 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b
 define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v3f16_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, s1, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s1, v0
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s5, s2, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s2
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v3, s0, v3
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v4
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v2, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v2, v2, 16, v4
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v3f16_ss:
@@ -526,20 +790,32 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x ha
 define amdgpu_ps <4 x half> @test_fmaximum_v4f16(<4 x half> %a, <4 x half> %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v4f16:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX9-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX9-SDAG-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v4f16:
@@ -574,32 +850,44 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16(<4 x half> %a, <4 x half> %b) {
 define amdgpu_ps <4 x half> @test_fmaximum_v4f16_ss(<4 x half> inreg %a, <4 x half> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v4f16_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-SDAG-NEXT:    s_lshr_b32 s3, s3, 16
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, s1, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s1, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s1, v0
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s2
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_pk_max_f16 v4, s0, v4
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, s1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v5
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v2, v2, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v3
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s5, s2, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v2, v2, 16, v4
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v4f16_ss:
@@ -642,11 +930,13 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16_ss(<4 x half> inreg %a, <4 x ha
 define amdgpu_ps <2 x float> @test_fmaximum_f64_vv(double %a, double %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_f64_vv:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_f64_vv:
@@ -670,13 +960,17 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_vv(double %a, double %b) {
 define amdgpu_ps <2 x float> @test_fmaximum_f64_ss(double inreg %a, double inreg %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_f64_ss:
 ; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX9-SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX9-SDAG-NEXT:    s_cselect_b32 s2, s0, s2
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], s[0:1], v[0:1]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_f64_ss:
@@ -702,19 +996,28 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_ss(double inreg %a, double inreg
 define amdgpu_ps <4 x float> @test_fmaximum_v2f64_ss(<2 x double> inreg %a, <2 x double> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v2f64_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s6
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-SDAG-NEXT:    v_max_f64 v[4:5], s[2:3], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], s[2:3], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v6, s[0:1]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[4:5], s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[10:11], s[6:7], s[6:7]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s5, s1
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s4, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s8, s1, s5
+; GFX9-SDAG-NEXT:    s_cselect_b32 s9, s0, s4
+; GFX9-SDAG-NEXT:    s_and_b64 s[4:5], s[10:11], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s3, s7, s3
+; GFX9-SDAG-NEXT:    s_cselect_b32 s2, s6, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s8
+; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], s[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[0:1], s[4:5], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s3, s7
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s2, s6
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s0
+; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], s[2:3], v[2:3]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v2f64_ss:
@@ -747,23 +1050,34 @@ define amdgpu_ps <4 x float> @test_fmaximum_v2f64_ss(<2 x double> inreg %a, <2 x
 define amdgpu_ps <8 x float> @test_fmaximum_v4f64(<4 x double> %a, <4 x double> %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v4f64:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-SDAG-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX9-SDAG-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[12:13]
-; GFX9-SDAG-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[4:5]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], v[10:11], v[10:11]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], v[12:13], v[12:13]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[0:1]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[2:3]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[4:5]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[0:1]
+; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[2:3]
+; GFX9-SDAG-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX9-SDAG-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v4f64:
@@ -802,31 +1116,50 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64(<4 x double> %a, <4 x double>
 define amdgpu_ps <8 x float> @test_fmaximum_v4f64_ss(<4 x double> inreg %a, <4 x double> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fmaximum_v4f64_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s9
-; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s10
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s11
-; GFX9-SDAG-NEXT:    v_max_f64 v[4:5], s[2:3], v[1:2]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], s[2:3], v[1:2]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s12
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s13
-; GFX9-SDAG-NEXT:    v_max_f64 v[6:7], s[4:5], v[1:2]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], s[4:5], v[1:2]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s14
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s15
-; GFX9-SDAG-NEXT:    v_max_f64 v[8:9], s[6:7], v[1:2]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[6:7], v[1:2]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v10, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v4, v6, 0, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, v7, v10, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v6, v8, 0, s[4:5]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v7, v9, v10, s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[16:17], s[8:9], s[8:9]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[18:19], s[10:11], s[10:11]
+; GFX9-SDAG-NEXT:    s_and_b64 s[16:17], s[16:17], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s9, s1
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s8, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[16:17], s[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[16:17], s[16:17], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s20, s1, s9
+; GFX9-SDAG-NEXT:    s_cselect_b32 s21, s0, s8
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[18:19], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s3, s11, s3
+; GFX9-SDAG-NEXT:    s_cselect_b32 s2, s10, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[2:3], s[2:3]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[16:17], s[12:13], s[12:13]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s20
+; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], s[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s18, s3, s11
+; GFX9-SDAG-NEXT:    s_cselect_b32 s19, s2, s10
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[16:17], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s5, s13, s5
+; GFX9-SDAG-NEXT:    s_cselect_b32 s4, s12, s4
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[4:5], s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[10:11], s[14:15], s[14:15]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s19
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s18
+; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], s[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s13, s5, s13
+; GFX9-SDAG-NEXT:    s_cselect_b32 s12, s4, s12
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[10:11], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s7, s15, s7
+; GFX9-SDAG-NEXT:    s_cselect_b32 s6, s14, s6
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s12
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, s13
+; GFX9-SDAG-NEXT:    v_max_f64 v[4:5], s[4:5], v[4:5]
+; GFX9-SDAG-NEXT:    s_and_b64 s[0:1], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s7, s15
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s6, s14
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v6, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v7, s0
+; GFX9-SDAG-NEXT:    v_max_f64 v[6:7], s[6:7], v[6:7]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fmaximum_v4f64_ss:
@@ -871,22 +1204,40 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64_ss(<4 x double> inreg %a, <4 x
 }
 
 define amdgpu_kernel void @fmaximumi_f32_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
-; GFX9-LABEL: fmaximumi_f32_move_to_valu:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[2:3] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v4, v1, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: fmaximumi_f32_move_to_valu:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[2:3] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: fmaximumi_f32_move_to_valu:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: fmaximumi_f32_move_to_valu:
 ; GFX12:       ; %bb.0:
@@ -910,22 +1261,40 @@ define amdgpu_kernel void @fmaximumi_f32_move_to_valu(ptr addrspace(1) %out, ptr
 }
 
 define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
-; GFX9-LABEL: fmaximum_f16_move_to_valu:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[2:3] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_ushort v2, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f16_e32 v4, v1, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: fmaximum_f16_move_to_valu:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v1, v0, s[2:3] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v2, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_max_f16_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: fmaximum_f16_move_to_valu:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v2, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f16_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; GFX12-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu:
 ; GFX12-SDAG-TRUE16:       ; %bb.0:
@@ -994,13 +1363,22 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr
 }
 
 define amdgpu_ps float @test_fmaximum_f32_ieee_on(float %a, float %b) #0 {
-; GFX9-LABEL: test_fmaximum_f32_ieee_on:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f32_ieee_on:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f32_ieee_on:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_f32_ieee_on:
 ; GFX12:       ; %bb.0:
@@ -1011,13 +1389,22 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_on(float %a, float %b) #0 {
 }
 
 define amdgpu_ps float @test_fmaximum_f32_ieee_off(float %a, float %b) #1 {
-; GFX9-LABEL: test_fmaximum_f32_ieee_off:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fmaximum_f32_ieee_off:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fmaximum_f32_ieee_off:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fmaximum_f32_ieee_off:
 ; GFX12:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 91f95859ed3db..629b0cded374d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -18,15 +18,20 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32:
@@ -53,15 +58,20 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v1, v0
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_commute:
@@ -86,16 +96,23 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
 ;
 ; GFX942-LABEL: s_fmaximum3_f32:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s1, s1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s2, s2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -129,15 +146,20 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fabs0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, |v0|, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, |v0|, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs0:
@@ -165,15 +187,20 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fabs1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, v0, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v1|, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v1|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, |v1|, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs1:
@@ -201,15 +228,19 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fabs2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v2|, |v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v2|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, |v2|, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs2:
@@ -237,15 +268,19 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, |v0|, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v2|, |v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, |v0|, |v1|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v2|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, |v2|, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
@@ -275,15 +310,19 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, -v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v2, -v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, -v0, -v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v2, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg_all:
@@ -313,15 +352,19 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fneg_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, -|v0|, -|v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -|v1|, -|v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -|v2|, -|v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -|v0|, -|v0|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v1, v0, -|v2|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, -|v0|, -|v1|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -|v2|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -|v2|, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all:
@@ -354,15 +397,20 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fneg0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, -v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg0:
@@ -390,15 +438,20 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fneg1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v3, v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v1, -v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v1, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg1:
@@ -426,15 +479,19 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_fneg2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v2, -v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v2, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg2:
@@ -462,15 +519,18 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_const0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v2, 0x41000000, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x41000000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_const0:
@@ -498,15 +558,18 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
 ; GFX942-LABEL: v_fmaximum3_f32__const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41000000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32__const2:
@@ -534,15 +597,17 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
 ; GFX942-LABEL: v_fmaximum3_f32_inlineimm0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v2, 4.0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_inlineimm0:
@@ -569,15 +634,17 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
 ; GFX942-LABEL: v_fmaximum3_f32__inlineimm:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, 4.0, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32__inlineimm:
@@ -606,15 +673,16 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
 ; GFX942-LABEL: v_fmaximum3_f32_const1_const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41000000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, 0x41800000, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41800000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_const1_const2:
@@ -644,23 +712,34 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
 ; GFX942-LABEL: v_fmaximum3_v2f32:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v6, v1, v3
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, v4, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, v5, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v2, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32:
@@ -689,23 +768,34 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
 ; GFX942-LABEL: v_fmaximum3_v2f32_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v6, v1, v3
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, v0, v4
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, v1, v5
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32_commute:
@@ -734,23 +824,32 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
 ; GFX942-LABEL: v_fmaximum3_v2f32__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v6, |v1|, |v3|
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX942-NEXT:    v_max_f32_e64 v3, |v0|, |v2|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v3|, |v3|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v4|, |v4|
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v2, v0, |v4|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_max_f32_e64 v2, v1, |v5|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v2|, |v2|
+; GFX942-NEXT:    v_max_f32_e64 v1, |v1|, |v3|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, |v0|, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v4|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v5|, |v5|
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, |v5|, s[0:1]
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, |v4|, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, |v5|, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all:
@@ -782,23 +881,32 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
 ; GFX942-LABEL: v_fmaximum3_v2f32__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v6, -v1, -v3
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX942-NEXT:    v_max_f32_e64 v3, -v0, -v2
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v3, -v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v4, -v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v2, v0, -v4
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_max_f32_e64 v2, v1, -v5
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v2, -v2
+; GFX942-NEXT:    v_max_f32_e64 v1, -v1, -v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, -v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v4, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v5, -v5
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, -v5, s[0:1]
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, -v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, -v5, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all:
@@ -830,23 +938,28 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
 ; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v4, 2.0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX942-NEXT:    v_max_f32_e32 v4, 2.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, 2.0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, 2.0, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX942-NEXT:    v_max_f32_e32 v2, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1:
@@ -875,23 +988,28 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
 ; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v4, v1, v3
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v2
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX942-NEXT:    v_max_f32_e32 v2, 4.0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, 4.0, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2:
@@ -921,31 +1039,48 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
 ; GFX942-LABEL: v_fmaximum3_v3f32:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v9, v2, v5
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX942-NEXT:    v_max_f32_e32 v5, v1, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v6, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v7, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v3, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v8, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v3, v1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32:
@@ -976,31 +1111,48 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
 ; GFX942-LABEL: v_fmaximum3_v3f32_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v9, v2, v5
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX942-NEXT:    v_max_f32_e32 v5, v1, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v6
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v1, v7
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v2, v8
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32_commute:
@@ -1031,31 +1183,44 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
 ; GFX942-LABEL: v_fmaximum3_v3f32__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v9, |v2|, |v5|
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX942-NEXT:    v_max_f32_e64 v5, |v1|, |v4|
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX942-NEXT:    v_max_f32_e64 v4, |v0|, |v3|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v5|, |v5|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v6|, |v6|
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_max_f32_e64 v3, v0, |v6|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v2|, |v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v3, v1, |v7|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v3, v2, |v8|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v4|, |v4|
+; GFX942-NEXT:    v_max_f32_e64 v2, |v2|, |v5|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v3|, |v3|
+; GFX942-NEXT:    v_max_f32_e64 v1, |v1|, |v4|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, |v0|, |v3|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v6|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v7|, |v7|
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, |v7|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v8|, |v8|
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, |v6|, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v2, |v8|, s[0:1]
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, |v7|, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, |v8|, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all:
@@ -1089,31 +1254,44 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
 ; GFX942-LABEL: v_fmaximum3_v3f32__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e64 v9, -v2, -v5
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX942-NEXT:    v_max_f32_e64 v5, -v1, -v4
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX942-NEXT:    v_max_f32_e64 v4, -v0, -v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v5, -v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v6, -v6
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v2, -v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_max_f32_e64 v3, v0, -v6
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v3, v1, -v7
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_max_f32_e64 v3, v2, -v8
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v4, -v4
+; GFX942-NEXT:    v_max_f32_e64 v2, -v2, -v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v3, -v3
+; GFX942-NEXT:    v_max_f32_e64 v1, -v1, -v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_max_f32_e64 v0, -v0, -v3
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v6, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v7, -v7
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, -v7, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v8, -v8
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, -v6, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v2, -v8, s[0:1]
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, -v7, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, -v8, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all:
@@ -1147,31 +1325,39 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
 ; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v6, 2.0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX942-NEXT:    v_max_f32_e32 v6, 2.0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, 2.0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v6
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, 2.0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v6
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, 2.0, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v6
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_max_f32_e32 v6, 2.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX942-NEXT:    v_max_f32_e32 v3, v1, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1:
@@ -1202,31 +1388,39 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
 ; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v6, v2, v5
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX942-NEXT:    v_max_f32_e32 v5, v1, v4
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, 4.0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v3, 4.0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, 4.0, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, 4.0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, 4.0, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2:
@@ -1266,15 +1460,20 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16:
@@ -1311,15 +1510,20 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v1, v0
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_commute:
@@ -1357,17 +1561,23 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %
 ;
 ; GFX942-LABEL: s_fmaximum3_f16:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_max_f16_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -1414,15 +1624,21 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fabs0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, |v0|, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fabs0:
@@ -1461,15 +1677,21 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fabs1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, v0, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v1|, |v1|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fabs1:
@@ -1508,15 +1730,21 @@ define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fabs2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v2|, |v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fabs2:
@@ -1555,15 +1783,23 @@ define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, |v0|, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v1|, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v2|, |v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fabs_all:
@@ -1606,15 +1842,23 @@ define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, -v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x8000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v1, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v4, 0x8000, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v2, -v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fneg_all:
@@ -1657,15 +1901,23 @@ define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fneg_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, -|v0|, -|v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
+; GFX942-NEXT:    v_or_b32_e32 v0, 0x8000, v0
+; GFX942-NEXT:    v_or_b32_e32 v3, 0x8000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -|v1|, -|v1|
+; GFX942-NEXT:    v_or_b32_e32 v4, 0x8000, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e64 v1, v0, -|v2|
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -|v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -|v2|, -|v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fneg_fabs_all:
@@ -1711,15 +1963,21 @@ define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fneg0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, -v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fneg0:
@@ -1758,15 +2016,21 @@ define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fneg1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e64 v3, v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x8000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v1, -v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fneg1:
@@ -1805,15 +2069,21 @@ define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_fneg2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x8000, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v2, -v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_fneg2:
@@ -1852,15 +2122,18 @@ define half @v_fmaximum3_f16_const0(half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_const0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v2, 0x4800, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4800
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_const0:
@@ -1898,15 +2171,18 @@ define half @v_fmaximum3_f16__const2(half %a, half %b) {
 ; GFX942-LABEL: v_fmaximum3_f16__const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, 0x4800, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4800
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16__const2:
@@ -1944,15 +2220,18 @@ define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) {
 ; GFX942-LABEL: v_fmaximum3_f16_inlineimm0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v2, 4.0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_inlineimm0:
@@ -1989,15 +2268,18 @@ define half @v_fmaximum3_f16__inlineimm(half %a, half %b) {
 ; GFX942-LABEL: v_fmaximum3_f16__inlineimm:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16__inlineimm:
@@ -2038,15 +2320,16 @@ define half @v_fmaximum3_f16_const1_const2(half %a) {
 ; GFX942-LABEL: v_fmaximum3_f16_const1_const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v1, 0x4800, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4800
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, 0x4c00, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4c00
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f16_const1_const2:
@@ -2077,24 +2360,40 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
 ; GFX942-LABEL: v_fmaximum3_v2f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v2, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v2, v2, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v1, v0
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16:
@@ -2123,24 +2422,39 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
 ; GFX942-LABEL: v_fmaximum3_v2f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v1, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16_commute:
@@ -2172,25 +2486,42 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX942-LABEL: v_fmaximum3_v2f16__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v3, v3, v4
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v6, v0, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v6, |v2| src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX942-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v1, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v1, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__fabs_all:
@@ -2225,24 +2556,42 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX942-LABEL: v_fmaximum3_v2f16__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v1, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v1, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__fneg_all:
@@ -2274,23 +2623,32 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
 ; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v2, v4, v0, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v1
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm1:
@@ -2319,24 +2677,33 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
 ; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v4, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v3, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm2:
@@ -2367,37 +2734,74 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
 ; GFX942-LABEL: v_fmaximum3_v3f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_max_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v9, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v5, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v9
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_pk_max_f16 v0, v2, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v2, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16:
@@ -2429,37 +2833,71 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
 ; GFX942-LABEL: v_fmaximum3_v3f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_max_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v9, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v9, v5
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16_commute:
@@ -2498,42 +2936,75 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX942-LABEL: v_fmaximum3_v3f16__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
-; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
-; GFX942-NEXT:    v_pk_max_f16 v7, v7, v9
-; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
-; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
-; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX942-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v9, v1, s0
-; GFX942-NEXT:    v_pk_max_f16 v3, v3, v10
-; GFX942-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v2, v7, v0, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v11
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v7, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v6, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__fabs_all:
@@ -2574,37 +3045,75 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX942-LABEL: v_fmaximum3_v3f16__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX942-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
+; GFX942-NEXT:    v_xor_b32_e32 v5, 0x80008000, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_max_f16 v6, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v9, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v9, -v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__fneg_all:
@@ -2639,32 +3148,54 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
 ; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX942-NEXT:    s_mov_b32 s1, 0x5040100
-; GFX942-NEXT:    v_pk_max_f16 v7, v1, 2.0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v4, v6, v0, s1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX942-NEXT:    s_movk_i32 s0, 0x7e00
-; GFX942-NEXT:    v_pk_max_f16 v4, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_pack_b32_f16 v7, v1, s0
-; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v7, v7, v3
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x4000
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    s_mov_b32 s1, 0xffff
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v4, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_perm_b32 v5, v6, v5, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_bfi_b32 v4, s1, v4, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v2, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v5, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v6, v0, s1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v4, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm1:
@@ -2696,37 +3227,55 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
 ; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cndmask_b32_e32 v7, v5, v4, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v7, s0
-; GFX942-NEXT:    v_pk_max_f16 v1, v1, 4.0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v7, v7
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v5, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v2, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX942-NEXT:    v_bfi_b32 v2, s0, v2, v1
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm2:
@@ -2758,40 +3307,74 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
 ; GFX942-LABEL: v_fmaximum3_v4f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_max_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v9, s0
-; GFX942-NEXT:    v_pk_max_f16 v3, v5, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v9
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_pk_max_f16 v0, v2, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v2, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16:
@@ -2823,40 +3406,71 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
 ; GFX942-LABEL: v_fmaximum3_v4f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_max_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v9, s0
-; GFX942-NEXT:    v_pk_max_f16 v3, v3, v5
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v9, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16_commute:
@@ -2895,43 +3509,75 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX942-LABEL: v_fmaximum3_v4f16__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
-; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
-; GFX942-NEXT:    v_pk_max_f16 v7, v7, v9
-; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
-; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX942-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX942-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
-; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v12, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX942-NEXT:    v_perm_b32 v2, v9, v0, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v10
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v3, v7, v1, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v7, |v5| src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v3, v3, v11
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v12, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v12, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX942-NEXT:    v_perm_b32 v1, v6, v1, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v7, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__fabs_all:
@@ -2972,40 +3618,75 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX942-LABEL: v_fmaximum3_v4f16__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX942-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
+; GFX942-NEXT:    v_xor_b32_e32 v5, 0x80008000, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_max_f16 v6, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v9, s0
-; GFX942-NEXT:    v_pk_max_f16 v3, v3, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v9, -v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__fneg_all:
@@ -3040,37 +3721,57 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
 ; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX942-NEXT:    v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x4000
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v4, v6, v0, s0
-; GFX942-NEXT:    v_pk_max_f16 v4, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_sdwa v8, v5, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_perm_b32 v7, v8, v1, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_max_f16 v7, v7, v3
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v8, v5, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v4, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v5, v6, v5, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v4, v4, v5, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX942-NEXT:    v_perm_b32 v1, v8, v1, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v6, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v2, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v5, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v4, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm1:
@@ -3102,40 +3803,58 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
 ; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX942-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cndmask_b32_e32 v7, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v7, s0
-; GFX942-NEXT:    v_pk_max_f16 v3, v3, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v7, v7
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v5, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm2:
@@ -3165,17 +3884,24 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3198,17 +3924,24 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %c, double %max0)
@@ -3229,20 +3962,31 @@ define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, do
 ;
 ; GFX9-LABEL: s_fmaximum3_f64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
-; GFX9-NEXT:    v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_u_f64_e64 s[6:7], s[2:3], s[2:3]
+; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX9-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX9-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX9-NEXT:    v_cmp_u_f64_e64 s[6:7], s[0:1], s[0:1]
+; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX9-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX9-NEXT:    s_cselect_b32 s2, s0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_max_f64 v[0:1], s[0:1], v[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, s[4:5], s[4:5]
+; GFX9-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
-; GFX9-NEXT:    v_readfirstlane_b32 s1, v0
-; GFX9-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX9-NEXT:    ; return to shader part epilog
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3272,17 +4016,25 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fabs0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], |v[0:1]|, v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b)
@@ -3306,17 +4058,25 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fabs1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], |v[2:3]|
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[2:3]|, |v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fabs = call double @llvm.fabs.f64(double %b)
   %max0 = call double @llvm.maximum.f64(double %a, double %b.fabs)
@@ -3340,17 +4100,25 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fabs2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[4:5]|, |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fabs = call double @llvm.fabs.f64(double %c)
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
@@ -3374,17 +4142,27 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fabs_all:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[2:3]|, |v[2:3]|
+; GFX9-NEXT:    v_and_b32_e32 v7, 0x7fffffff, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[4:5]|, |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3410,17 +4188,27 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fneg_all:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], -v[0:1], -v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    v_xor_b32_e32 v6, 0x80000000, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[2:3], -v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v7, 0x80000000, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[4:5], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %b.fneg = fneg double %b
@@ -3446,17 +4234,27 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    v_or_b32_e32 v6, 0x80000000, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[2:3]|, -|v[2:3]|
+; GFX9-NEXT:    v_or_b32_e32 v7, 0x80000000, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -|v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[4:5]|, -|v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3485,17 +4283,25 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fneg0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], -v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b)
@@ -3519,17 +4325,25 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fneg1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], -v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v6, 0x80000000, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[2:3], -v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fneg = fneg double %b
   %max0 = call double @llvm.maximum.f64(double %a, double %b.fneg)
@@ -3553,17 +4367,25 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_fneg2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v6, 0x80000000, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[4:5], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fneg = fneg double %c
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
@@ -3587,19 +4409,21 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_const0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x40200000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double 8.0, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3622,18 +4446,21 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) {
 ; GFX9-LABEL: v_fmaximum3_f64__const2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40200000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double 8.0)
@@ -3656,17 +4483,21 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) {
 ; GFX9-LABEL: v_fmaximum3_f64_inlineimm0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], 4.0
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x40100000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double 4.0, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3689,17 +4520,21 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) {
 ; GFX9-LABEL: v_fmaximum3_f64__inlineimm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], 4.0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40100000
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double 4.0)
@@ -3722,20 +4557,18 @@ define double @v_fmaximum3_f64_const1_const2(double %a) {
 ; GFX9-LABEL: v_fmaximum3_f64_const1_const2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40200000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40300000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40300000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double 8.0)
   %max1 = call double @llvm.maximum.f64(double %max0, double 16.0)
@@ -3758,15 +4591,20 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c)
 ; GFX942-LABEL: v_no_fmaximum3_f32__multi_use:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fmaximum3_f32__multi_use:
@@ -3792,17 +4630,24 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
 ;
 ; GFX942-LABEL: s_no_fmaximum3_f32__multi_use:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s1, s1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_max_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s2, s2
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v1
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -3856,15 +4701,20 @@ define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_no_fmaximum3_f16__multi_use:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_max_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX942-NEXT:    v_max_f16_e32 v1, v1, v2
 ; GFX942-NEXT:    v_pack_b32_f16 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3896,19 +4746,25 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half in
 ;
 ; GFX942-LABEL: s_no_fmaximum3_f16__multi_use:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_max_f16_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_max_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
-; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX942-NEXT:    v_max_f16_e32 v1, v2, v1
+; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX942-NEXT:    ; return to shader part epilog
 ;
@@ -3952,24 +4808,39 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
 ; GFX942-LABEL: v_no_fmaximum3_v2f16__multi_use:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v1, v5, s0
-; GFX942-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v1, v5, s0
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v1, v4, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fmaximum3_v2f16__multi_use:
@@ -4001,17 +4872,24 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
 ; GFX9-LABEL: v_no_fmaximum3_f64__multi_use:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll
index 4cccc768d3c50..6f6ff465067b0 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll
@@ -7,13 +7,22 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
 
 define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) {
-; GFX9-LABEL: test_fminimum_f32_vv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f32_vv:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f32_vv:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_f32_vv:
 ; GFX12:       ; %bb.0:
@@ -24,14 +33,25 @@ define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) {
 }
 
 define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) {
-; GFX9-LABEL: test_fminimum_f32_ss:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f32_ss:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s1, s1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f32_ss:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_f32_ss:
 ; GFX12:       ; %bb.0:
@@ -44,13 +64,23 @@ define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) {
 }
 
 define amdgpu_ps float @test_fminimum_f32_vs(float %a, float inreg %b) {
-; GFX9-LABEL: test_fminimum_f32_vs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f32_vs:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s0, s0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f32_vs:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_f32_vs:
 ; GFX12:       ; %bb.0:
@@ -75,13 +105,22 @@ define amdgpu_ps float @test_fminimum_nnan_f32(float %a, float %b) {
 }
 
 define amdgpu_ps float @test_fminimum_nsz_f32(float %a, float %b) {
-; GFX9-LABEL: test_fminimum_nsz_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_nsz_f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_nsz_f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_nsz_f32:
 ; GFX12:       ; %bb.0:
@@ -106,16 +145,30 @@ define amdgpu_ps float @test_fminimum_signed_zero_f32() {
 }
 
 define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b) {
-; GFX9-LABEL: test_fminimum_v2f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_v2f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_v2f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v0, v2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v1, v3
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_v2f32:
 ; GFX12:       ; %bb.0:
@@ -127,18 +180,36 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b
 }
 
 define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x float> inreg %b) {
-; GFX9-LABEL: test_fminimum_v2f32_ss:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_min_f32_e32 v3, s1, v1
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, s1, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_v2f32_ss:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_v2f32_ss:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v3, s1, v1
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, s1, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_v2f32_ss:
 ; GFX12:       ; %bb.0:
@@ -152,19 +223,38 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x
 }
 
 define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b) {
-; GFX9-LABEL: test_fminimum_v3f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_v3f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_v3f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v6, v0, v3
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v3, v1, v4
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v3, v2, v5
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_v3f32:
 ; GFX12:       ; %bb.0:
@@ -177,22 +267,46 @@ define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b
 }
 
 define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b) {
-; GFX9-LABEL: test_fminimum_v4f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_v4f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v3, v3, v4
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_v4f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v8, v0, v4
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v5
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v6
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v3, v7
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_v4f32:
 ; GFX12:       ; %bb.0:
@@ -206,58 +320,142 @@ define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b
 }
 
 define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float> %b) {
-; GFX9-LABEL: test_fminimum_v16f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v32, v1, v17
-; GFX9-NEXT:    v_mov_b32_e32 v33, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v16
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[12:13], v0, v16
-; GFX9-NEXT:    v_min_f32_e32 v17, v2, v18
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[0:1], v2, v18
-; GFX9-NEXT:    v_min_f32_e32 v18, v3, v19
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[2:3], v3, v19
-; GFX9-NEXT:    v_min_f32_e32 v19, v4, v20
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[4:5], v4, v20
-; GFX9-NEXT:    v_min_f32_e32 v20, v5, v21
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[6:7], v5, v21
-; GFX9-NEXT:    v_min_f32_e32 v21, v6, v22
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[8:9], v6, v22
-; GFX9-NEXT:    v_min_f32_e32 v22, v7, v23
-; GFX9-NEXT:    v_cmp_o_f32_e64 s[10:11], v7, v23
-; GFX9-NEXT:    v_min_f32_e32 v23, v8, v24
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v33, v1, s[12:13]
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v8, v24
-; GFX9-NEXT:    v_min_f32_e32 v34, v9, v25
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v33, v23, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v9, v25
-; GFX9-NEXT:    v_min_f32_e32 v35, v10, v26
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v33, v34, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v10, v26
-; GFX9-NEXT:    v_min_f32_e32 v36, v11, v27
-; GFX9-NEXT:    v_cndmask_b32_e32 v10, v33, v35, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v11, v27
-; GFX9-NEXT:    v_min_f32_e32 v37, v12, v28
-; GFX9-NEXT:    v_cndmask_b32_e32 v11, v33, v36, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v12, v28
-; GFX9-NEXT:    v_min_f32_e32 v16, v13, v29
-; GFX9-NEXT:    v_cndmask_b32_e32 v12, v33, v37, vcc
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v13, v29
-; GFX9-NEXT:    v_cndmask_b32_e32 v13, v33, v16, vcc
-; GFX9-NEXT:    v_min_f32_e32 v16, v14, v30
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v14, v30
-; GFX9-NEXT:    v_cndmask_b32_e32 v14, v33, v16, vcc
-; GFX9-NEXT:    v_min_f32_e32 v16, v15, v31
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v15, v31
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v33, v17, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, v33, v18, s[2:3]
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, v33, v19, s[4:5]
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v33, v20, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, v33, v21, s[8:9]
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, v33, v22, s[10:11]
-; GFX9-NEXT:    v_cndmask_b32_e32 v15, v33, v16, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_v16f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v2, v2, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v3, v3, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v4, v4, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v21, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v5, v5, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v6, v6, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v23, v7, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v7, v7, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v8, v8, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v25, v9, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v9, v9, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v10, v10, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v27, v11, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v11, v11, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v12, v12, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v29, v13, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v13, v13, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v31, v31
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v14, v14, v16
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v16, v31, v15, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v15, v15, v16
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_v16f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v32, v1, v17
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v33, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v0, v16
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[12:13], v0, v16
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v17, v2, v18
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[0:1], v2, v18
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v18, v3, v19
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[2:3], v3, v19
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v19, v4, v20
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[4:5], v4, v20
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v20, v5, v21
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[6:7], v5, v21
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v21, v6, v22
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[8:9], v6, v22
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v22, v7, v23
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e64 s[10:11], v7, v23
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v23, v8, v24
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v33, v1, s[12:13]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v8, v24
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v34, v9, v25
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v8, v33, v23, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v9, v25
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v35, v10, v26
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v9, v33, v34, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v10, v26
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v36, v11, v27
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v10, v33, v35, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v11, v27
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v37, v12, v28
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v11, v33, v36, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v12, v28
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v16, v13, v29
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v12, v33, v37, vcc
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v13, v29
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v13, v33, v16, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v16, v14, v30
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v14, v30
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v14, v33, v16, vcc
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v16, v15, v31
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v15, v31
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, v33, v17, s[0:1]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v3, v33, v18, s[2:3]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v4, v33, v19, s[4:5]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v5, v33, v20, s[6:7]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v6, v33, v21, s[8:9]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v7, v33, v22, s[10:11]
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v15, v33, v16, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_v16f32:
 ; GFX12:       ; %bb.0:
@@ -283,13 +481,22 @@ define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float
 }
 
 define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) {
-; GFX9-LABEL: test_fminimum_f16_vv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f16_vv:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f16_vv:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f16_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_fminimum_f16_vv:
 ; GFX12-SDAG-TRUE16:       ; %bb.0:
@@ -315,14 +522,25 @@ define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) {
 }
 
 define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) {
-; GFX9-LABEL: test_fminimum_f16_ss:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_min_f16_e32 v1, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f16_ss:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f16_ss:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-GISEL-NEXT:    v_min_f16_e32 v1, s0, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_f16_ss:
 ; GFX12:       ; %bb.0:
@@ -337,14 +555,19 @@ define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) {
 define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v2f16_vv:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX9-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v2f16_vv:
@@ -370,19 +593,25 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b
 define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x half> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v2f16_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, s0, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v2f16_ss:
@@ -412,17 +641,32 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x ha
 define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v3f16_vv:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX9-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v3f16_vv:
@@ -464,24 +708,44 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b
 define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v3f16_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, s1, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s1, v0
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s5, s2, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s2
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v3, s0, v3
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v4
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v2, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v2, v2, 16, v4
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v3f16_ss:
@@ -526,20 +790,32 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x ha
 define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v4f16:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX9-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX9-SDAG-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX9-SDAG-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v4f16:
@@ -574,32 +850,44 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) {
 define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x half> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v4f16_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-SDAG-NEXT:    s_lshr_b32 s3, s3, 16
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, s1, v1
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s1, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s1, v0
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s2
-; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-SDAG-NEXT:    v_pk_min_f16 v4, s0, v4
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, s1
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
-; GFX9-SDAG-NEXT:    v_cmp_o_f16_e32 vcc, s0, v5
-; GFX9-SDAG-NEXT:    v_cndmask_b32_sdwa v2, v2, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v3
-; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s5, s2, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX9-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s3
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 vcc, s3, s3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v2, v2, 16, v4
+; GFX9-SDAG-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v4f16_ss:
@@ -642,11 +930,13 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x ha
 define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_f64_vv:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_f64_vv:
@@ -670,13 +960,17 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) {
 define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_f64_ss:
 ; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX9-SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX9-SDAG-NEXT:    s_cselect_b32 s2, s0, s2
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-SDAG-NEXT:    v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-SDAG-NEXT:    v_min_f64 v[0:1], s[0:1], v[0:1]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_f64_ss:
@@ -702,19 +996,28 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg
 define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x double> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v2f64_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9-SDAG-NEXT:    v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s6
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-SDAG-NEXT:    v_min_f64 v[4:5], s[2:3], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], s[2:3], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v6, s[0:1]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[4:5], s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[10:11], s[6:7], s[6:7]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s5, s1
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s4, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s8, s1, s5
+; GFX9-SDAG-NEXT:    s_cselect_b32 s9, s0, s4
+; GFX9-SDAG-NEXT:    s_and_b64 s[4:5], s[10:11], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s3, s7, s3
+; GFX9-SDAG-NEXT:    s_cselect_b32 s2, s6, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s8
+; GFX9-SDAG-NEXT:    v_min_f64 v[0:1], s[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[0:1], s[4:5], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s3, s7
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s2, s6
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s0
+; GFX9-SDAG-NEXT:    v_min_f64 v[2:3], s[2:3], v[2:3]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v2f64_ss:
@@ -747,23 +1050,34 @@ define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x
 define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double> %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v4f64:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-SDAG-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX9-SDAG-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[12:13]
-; GFX9-SDAG-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[4:5]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], v[10:11], v[10:11]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], v[12:13], v[12:13]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[0:1]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[2:3]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[4:5]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX9-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[0:1]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[0:1]
+; GFX9-SDAG-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[2:3]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[2:3]
+; GFX9-SDAG-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX9-SDAG-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v4f64:
@@ -802,31 +1116,50 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double>
 define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x double> inreg %b) {
 ; GFX9-SDAG-LABEL: test_fminimum_v4f64_ss:
 ; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s9
-; GFX9-SDAG-NEXT:    v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s10
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s11
-; GFX9-SDAG-NEXT:    v_min_f64 v[4:5], s[2:3], v[1:2]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[0:1], s[2:3], v[1:2]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s12
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s13
-; GFX9-SDAG-NEXT:    v_min_f64 v[6:7], s[4:5], v[1:2]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], s[4:5], v[1:2]
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s14
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s15
-; GFX9-SDAG-NEXT:    v_min_f64 v[8:9], s[6:7], v[1:2]
-; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[4:5], s[6:7], v[1:2]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v10, s[0:1]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v4, v6, 0, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, v7, v10, s[2:3]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v6, v8, 0, s[4:5]
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v7, v9, v10, s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[16:17], s[8:9], s[8:9]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[18:19], s[10:11], s[10:11]
+; GFX9-SDAG-NEXT:    s_and_b64 s[16:17], s[16:17], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s9, s1
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s8, s0
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[16:17], s[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[16:17], s[16:17], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s20, s1, s9
+; GFX9-SDAG-NEXT:    s_cselect_b32 s21, s0, s8
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[18:19], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s3, s11, s3
+; GFX9-SDAG-NEXT:    s_cselect_b32 s2, s10, s2
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[2:3], s[2:3]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[16:17], s[12:13], s[12:13]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s20
+; GFX9-SDAG-NEXT:    v_min_f64 v[0:1], s[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s18, s3, s11
+; GFX9-SDAG-NEXT:    s_cselect_b32 s19, s2, s10
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[16:17], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s5, s13, s5
+; GFX9-SDAG-NEXT:    s_cselect_b32 s4, s12, s4
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[4:5], s[4:5]
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[10:11], s[14:15], s[14:15]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s19
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, s18
+; GFX9-SDAG-NEXT:    v_min_f64 v[2:3], s[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s13, s5, s13
+; GFX9-SDAG-NEXT:    s_cselect_b32 s12, s4, s12
+; GFX9-SDAG-NEXT:    s_and_b64 s[8:9], s[10:11], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s7, s15, s7
+; GFX9-SDAG-NEXT:    s_cselect_b32 s6, s14, s6
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, s12
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, s13
+; GFX9-SDAG-NEXT:    v_min_f64 v[4:5], s[4:5], v[4:5]
+; GFX9-SDAG-NEXT:    s_and_b64 s[0:1], s[8:9], exec
+; GFX9-SDAG-NEXT:    s_cselect_b32 s0, s7, s15
+; GFX9-SDAG-NEXT:    s_cselect_b32 s1, s6, s14
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v6, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v7, s0
+; GFX9-SDAG-NEXT:    v_min_f64 v[6:7], s[6:7], v[6:7]
 ; GFX9-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_fminimum_v4f64_ss:
@@ -871,22 +1204,40 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x
 }
 
 define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
-; GFX9-LABEL: fminimumi_f32_move_to_valu:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[2:3] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v4, v1, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: fminimumi_f32_move_to_valu:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[2:3] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: fminimumi_f32_move_to_valu:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: fminimumi_f32_move_to_valu:
 ; GFX12:       ; %bb.0:
@@ -910,22 +1261,40 @@ define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr
 }
 
 define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
-; GFX9-LABEL: fminimum_f16_move_to_valu:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[2:3] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_ushort v2, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_min_f16_e32 v4, v1, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: fminimum_f16_move_to_valu:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v1, v0, s[2:3] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v2, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-SDAG-NEXT:    v_min_f16_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: fminimum_f16_move_to_valu:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v2, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_min_f16_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; GFX12-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu:
 ; GFX12-SDAG-TRUE16:       ; %bb.0:
@@ -994,13 +1363,22 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr
 }
 
 define amdgpu_ps float @test_fminimum_f32_ieee_on(float %a, float %b) #0 {
-; GFX9-LABEL: test_fminimum_f32_ieee_on:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f32_ieee_on:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f32_ieee_on:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_f32_ieee_on:
 ; GFX12:       ; %bb.0:
@@ -1011,13 +1389,22 @@ define amdgpu_ps float @test_fminimum_f32_ieee_on(float %a, float %b) #0 {
 }
 
 define amdgpu_ps float @test_fminimum_f32_ieee_off(float %a, float %b) #1 {
-; GFX9-LABEL: test_fminimum_f32_ieee_off:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX9-SDAG-LABEL: test_fminimum_f32_ieee_off:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-SDAG-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX9-GISEL-LABEL: test_fminimum_f32_ieee_off:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-GISEL-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: test_fminimum_f32_ieee_off:
 ; GFX12:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 55fc7747bf81d..a345818de5711 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -18,15 +18,20 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32:
@@ -53,15 +58,20 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v1, v0
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_commute:
@@ -86,16 +96,23 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
 ;
 ; GFX942-LABEL: s_fminimum3_f32:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s1, s1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s2, s2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -129,15 +146,20 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fabs0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, |v0|, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, |v0|, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs0:
@@ -165,15 +187,20 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fabs1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, v0, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v1|, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v1|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, |v1|, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs1:
@@ -201,15 +228,19 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fabs2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v2|, |v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v2|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, |v2|, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs2:
@@ -237,15 +268,19 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, |v0|, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v2|, |v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, |v0|, |v1|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v2|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, |v2|, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs_all:
@@ -275,15 +310,19 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, -v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v2, -v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, -v0, -v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v2, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg_all:
@@ -313,15 +352,19 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fneg_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, -|v0|, -|v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -|v1|, -|v1|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -|v2|, -|v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -|v0|, -|v0|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v1, v0, -|v2|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, -|v0|, -|v1|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -|v2|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -|v2|, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all:
@@ -354,15 +397,20 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fneg0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, -v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg0:
@@ -390,15 +438,20 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fneg1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v3, v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v1, -v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v1, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg1:
@@ -426,15 +479,19 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_fneg2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v2, -v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v2, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, -v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg2:
@@ -462,15 +519,18 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_const0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v2, 0x41000000, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x41000000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_const0:
@@ -498,15 +558,18 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
 ; GFX942-LABEL: v_fminimum3_f32__const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, 0x41000000, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41000000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32__const2:
@@ -534,15 +597,17 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
 ; GFX942-LABEL: v_fminimum3_f32_inlineimm0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v2, 4.0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_inlineimm0:
@@ -569,15 +634,17 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
 ; GFX942-LABEL: v_fminimum3_f32__inlineimm:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, 4.0, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32__inlineimm:
@@ -606,15 +673,16 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
 ; GFX942-LABEL: v_fminimum3_f32_const1_const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v1, 0x41000000, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41000000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, 0x41800000, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41800000
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_const1_const2:
@@ -644,23 +712,34 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
 ; GFX942-LABEL: v_fminimum3_v2f32:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v6, v1, v3
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, v4, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, v5, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v2, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v2, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32:
@@ -689,23 +768,34 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
 ; GFX942-LABEL: v_fminimum3_v2f32_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v6, v1, v3
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, v0, v4
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, v1, v5
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32_commute:
@@ -734,23 +824,32 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
 ; GFX942-LABEL: v_fminimum3_v2f32__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v6, |v1|, |v3|
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX942-NEXT:    v_min_f32_e64 v3, |v0|, |v2|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v3|, |v3|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v4|, |v4|
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v2, v0, |v4|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_min_f32_e64 v2, v1, |v5|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v2|, |v2|
+; GFX942-NEXT:    v_min_f32_e64 v1, |v1|, |v3|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, |v0|, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v4|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v5|, |v5|
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, |v5|, s[0:1]
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, |v4|, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, |v5|, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__fabs_all:
@@ -782,23 +881,32 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
 ; GFX942-LABEL: v_fminimum3_v2f32__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v6, -v1, -v3
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX942-NEXT:    v_min_f32_e64 v3, -v0, -v2
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v3, -v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v4, -v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v2, v0, -v4
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX942-NEXT:    v_min_f32_e64 v2, v1, -v5
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v2, -v2
+; GFX942-NEXT:    v_min_f32_e64 v1, -v1, -v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, -v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v4, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v5, -v5
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, -v5, s[0:1]
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, -v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, -v5, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__fneg_all:
@@ -830,23 +938,28 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
 ; GFX942-LABEL: v_fminimum3_v2f32__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v4, 2.0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX942-NEXT:    v_min_f32_e32 v4, 2.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, 2.0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, 2.0, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX942-NEXT:    v_min_f32_e32 v2, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1:
@@ -875,23 +988,28 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
 ; GFX942-LABEL: v_fminimum3_v2f32__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v4, v1, v3
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v2
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX942-NEXT:    v_min_f32_e32 v2, 4.0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, 4.0, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2:
@@ -921,31 +1039,48 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
 ; GFX942-LABEL: v_fminimum3_v3f32:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v9, v2, v5
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX942-NEXT:    v_min_f32_e32 v5, v1, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v6, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v7, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v3, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v8, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v3, v1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v3, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32:
@@ -976,31 +1111,48 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
 ; GFX942-LABEL: v_fminimum3_v3f32_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v9, v2, v5
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX942-NEXT:    v_min_f32_e32 v5, v1, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v6
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v1, v7
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v2, v8
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32_commute:
@@ -1031,31 +1183,44 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
 ; GFX942-LABEL: v_fminimum3_v3f32__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v9, |v2|, |v5|
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX942-NEXT:    v_min_f32_e64 v5, |v1|, |v4|
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX942-NEXT:    v_min_f32_e64 v4, |v0|, |v3|
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v5|, |v5|
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v6|, |v6|
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_min_f32_e64 v3, v0, |v6|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v2|, |v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v3, v1, |v7|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v3, v2, |v8|
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v4|, |v4|
+; GFX942-NEXT:    v_min_f32_e64 v2, |v2|, |v5|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v3|, |v3|
+; GFX942-NEXT:    v_min_f32_e64 v1, |v1|, |v4|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, |v0|, |v3|
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, |v6|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v7|, |v7|
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, |v7|, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], |v8|, |v8|
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, |v6|, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v2, |v8|, s[0:1]
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, |v7|, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, |v8|, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__fabs_all:
@@ -1089,31 +1254,44 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
 ; GFX942-LABEL: v_fminimum3_v3f32__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e64 v9, -v2, -v5
-; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX942-NEXT:    v_min_f32_e64 v5, -v1, -v4
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX942-NEXT:    v_min_f32_e64 v4, -v0, -v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v5, -v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v6, -v6
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v2, -v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX942-NEXT:    v_min_f32_e64 v3, v0, -v6
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v3, v1, -v7
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX942-NEXT:    v_min_f32_e64 v3, v2, -v8
-; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v4, -v4
+; GFX942-NEXT:    v_min_f32_e64 v2, -v2, -v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v3, -v3
+; GFX942-NEXT:    v_min_f32_e64 v1, -v1, -v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_min_f32_e64 v0, -v0, -v3
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v0, -v6, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v7, -v7
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v1, v1, -v7, s[0:1]
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], -v8, -v8
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, -v6, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v2, -v8, s[0:1]
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, -v7, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, -v8, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__fneg_all:
@@ -1147,31 +1325,39 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
 ; GFX942-LABEL: v_fminimum3_v3f32__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v6, 2.0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX942-NEXT:    v_min_f32_e32 v6, 2.0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, 2.0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v6
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, 2.0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v6
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, 2.0, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v6
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_min_f32_e32 v6, 2.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX942-NEXT:    v_min_f32_e32 v3, v1, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1:
@@ -1202,31 +1388,39 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
 ; GFX942-LABEL: v_fminimum3_v3f32__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v6, v2, v5
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX942-NEXT:    v_min_f32_e32 v5, v1, v4
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, 4.0, v1
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v3, 4.0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, 4.0, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, 4.0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, 4.0, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2:
@@ -1266,15 +1460,20 @@ define half @v_fminimum3_f16(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16:
@@ -1311,15 +1510,20 @@ define half @v_fminimum3_f16_commute(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v1, v0
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_commute:
@@ -1357,17 +1561,23 @@ define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg %
 ;
 ; GFX942-LABEL: s_fminimum3_f16:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_min_f16_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -1414,15 +1624,21 @@ define half @v_fminimum3_f16_fabs0(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fabs0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, |v0|, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fabs0:
@@ -1461,15 +1677,21 @@ define half @v_fminimum3_f16_fabs1(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fabs1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, v0, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v1|, |v1|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fabs1:
@@ -1508,15 +1730,21 @@ define half @v_fminimum3_f16_fabs2(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fabs2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v2|, |v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fabs2:
@@ -1555,15 +1783,23 @@ define half @v_fminimum3_f16_fabs_all(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, |v0|, |v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v1|, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, |v2|, |v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e64 v1, v0, |v2|
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fabs_all:
@@ -1606,15 +1842,23 @@ define half @v_fminimum3_f16_fneg_all(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, -v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x8000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v1, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v4, 0x8000, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v2, -v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fneg_all:
@@ -1657,15 +1901,23 @@ define half @v_fminimum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fneg_fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, -|v0|, -|v1|
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
+; GFX942-NEXT:    v_or_b32_e32 v0, 0x8000, v0
+; GFX942-NEXT:    v_or_b32_e32 v3, 0x8000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -|v1|, -|v1|
+; GFX942-NEXT:    v_or_b32_e32 v4, 0x8000, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e64 v1, v0, -|v2|
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -|v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -|v2|, -|v2|
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fneg_fabs_all:
@@ -1711,15 +1963,21 @@ define half @v_fminimum3_f16_fneg0(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fneg0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, -v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fneg0:
@@ -1758,15 +2016,21 @@ define half @v_fminimum3_f16_fneg1(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fneg1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e64 v3, v0, -v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x8000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v1, -v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fneg1:
@@ -1805,15 +2069,21 @@ define half @v_fminimum3_f16_fneg2(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_fneg2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x8000, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e64 v1, v0, -v2
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, -v2, -v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_fneg2:
@@ -1852,15 +2122,18 @@ define half @v_fminimum3_f16_const0(half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_const0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v2, 0x4800, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4800
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_const0:
@@ -1898,15 +2171,18 @@ define half @v_fminimum3_f16__const2(half %a, half %b) {
 ; GFX942-LABEL: v_fminimum3_f16__const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, 0x4800, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4800
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16__const2:
@@ -1944,15 +2220,18 @@ define half @v_fminimum3_f16_inlineimm0(half %b, half %c) {
 ; GFX942-LABEL: v_fminimum3_f16_inlineimm0:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v2, 4.0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_inlineimm0:
@@ -1989,15 +2268,18 @@ define half @v_fminimum3_f16__inlineimm(half %a, half %b) {
 ; GFX942-LABEL: v_fminimum3_f16__inlineimm:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, 4.0, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16__inlineimm:
@@ -2038,15 +2320,16 @@ define half @v_fminimum3_f16_const1_const2(half %a) {
 ; GFX942-LABEL: v_fminimum3_f16_const1_const2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v1, 0x4800, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4800
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, 0x4c00, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4c00
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f16_const1_const2:
@@ -2077,24 +2360,40 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
 ; GFX942-LABEL: v_fminimum3_v2f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v2, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v2, v2, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v1, v0
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16:
@@ -2123,24 +2422,39 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
 ; GFX942-LABEL: v_fminimum3_v2f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v1, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16_commute:
@@ -2172,25 +2486,42 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX942-LABEL: v_fminimum3_v2f16__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v3, v3, v4
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v6, v0, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v6, |v2| src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX942-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v1, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v1, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__fabs_all:
@@ -2225,24 +2556,42 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX942-LABEL: v_fminimum3_v2f16__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX942-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v1, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v4, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v1, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__fneg_all:
@@ -2274,23 +2623,32 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
 ; GFX942-LABEL: v_fminimum3_v2f16__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v2, v4, v0, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v1
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__inlineimm1:
@@ -2319,24 +2677,33 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
 ; GFX942-LABEL: v_fminimum3_v2f16__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v0, v4, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v3, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__inlineimm2:
@@ -2367,37 +2734,74 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
 ; GFX942-LABEL: v_fminimum3_v3f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_min_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v9, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v5, v1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v9
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_pk_min_f16 v0, v2, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v2, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16:
@@ -2429,37 +2833,71 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
 ; GFX942-LABEL: v_fminimum3_v3f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_min_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v9, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v9, v5
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16_commute:
@@ -2498,42 +2936,75 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX942-LABEL: v_fminimum3_v3f16__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
-; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
-; GFX942-NEXT:    v_pk_min_f16 v7, v7, v9
-; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
-; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
-; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX942-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v9, v1, s0
-; GFX942-NEXT:    v_pk_min_f16 v3, v3, v10
-; GFX942-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v2, v7, v0, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v11
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v7, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v6, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__fabs_all:
@@ -2574,37 +3045,75 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX942-LABEL: v_fminimum3_v3f16__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX942-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
+; GFX942-NEXT:    v_xor_b32_e32 v5, 0x80008000, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_min_f16 v6, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v9, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v9, -v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__fneg_all:
@@ -2639,32 +3148,54 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
 ; GFX942-LABEL: v_fminimum3_v3f16__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX942-NEXT:    s_mov_b32 s1, 0x5040100
-; GFX942-NEXT:    v_pk_min_f16 v7, v1, 2.0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v4, v6, v0, s1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX942-NEXT:    s_movk_i32 s0, 0x7e00
-; GFX942-NEXT:    v_pk_min_f16 v4, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_pack_b32_f16 v7, v1, s0
-; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v7, v7, v3
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x4000
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    s_mov_b32 s1, 0xffff
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v4, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_perm_b32 v5, v6, v5, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_bfi_b32 v4, s1, v4, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v2, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v5, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v6, v0, s1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v4, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__inlineimm1:
@@ -2696,37 +3227,55 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
 ; GFX942-LABEL: v_fminimum3_v3f16__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cndmask_b32_e32 v7, v5, v4, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v1, v1, v7, s0
-; GFX942-NEXT:    v_pk_min_f16 v1, v1, 4.0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v7, v7
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v5, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v2, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX942-NEXT:    v_bfi_b32 v2, s0, v2, v1
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__inlineimm2:
@@ -2758,40 +3307,74 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
 ; GFX942-LABEL: v_fminimum3_v4f16:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_min_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v9, s0
-; GFX942-NEXT:    v_pk_min_f16 v3, v5, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v9
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_pk_min_f16 v0, v2, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v2, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16:
@@ -2823,40 +3406,71 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
 ; GFX942-LABEL: v_fminimum3_v4f16_commute:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v1, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_min_f16 v6, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v9, s0
-; GFX942-NEXT:    v_pk_min_f16 v3, v3, v5
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v9, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v6, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16_commute:
@@ -2895,43 +3509,75 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX942-LABEL: v_fminimum3_v4f16__fabs_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
-; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
-; GFX942-NEXT:    v_pk_min_f16 v7, v7, v9
-; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
-; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX942-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX942-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
-; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v12, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX942-NEXT:    v_perm_b32 v2, v9, v0, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v10
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX942-NEXT:    v_perm_b32 v3, v7, v1, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v7, |v5| src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v3, v3, v11
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v12, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v12, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX942-NEXT:    v_perm_b32 v1, v6, v1, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v7, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__fabs_all:
@@ -2972,40 +3618,75 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX942-LABEL: v_fminimum3_v4f16__fneg_all:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX942-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
+; GFX942-NEXT:    v_xor_b32_e32 v5, 0x80008000, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v7, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX942-NEXT:    v_pk_min_f16 v6, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cndmask_b32_e32 v9, v7, v6, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v2, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v9, s0
-; GFX942-NEXT:    v_pk_min_f16 v3, v3, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v9, -v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v7, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v7, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__fneg_all:
@@ -3040,37 +3721,57 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
 ; GFX942-LABEL: v_fminimum3_v4f16__inlineimm1:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX942-NEXT:    v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x4000
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v4, v6, v0, s0
-; GFX942-NEXT:    v_pk_min_f16 v4, v4, v2
-; GFX942-NEXT:    v_cndmask_b32_sdwa v8, v5, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_perm_b32 v7, v8, v1, s0
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_pk_min_f16 v7, v7, v3
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_sdwa v8, v5, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v4, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v5, v6, v5, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v4, v4, v5, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX942-NEXT:    v_perm_b32 v1, v8, v1, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_perm_b32 v0, v6, v0, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v2, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v5, s0
+; GFX942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v3, v4, s0
+; GFX942-NEXT:    v_perm_b32 v1, v1, v2, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__inlineimm1:
@@ -3102,40 +3803,58 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
 ; GFX942-LABEL: v_fminimum3_v4f16__inlineimm2:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX942-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX942-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cndmask_b32_e32 v7, v5, v4, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v3, v1, v7, s0
-; GFX942-NEXT:    v_pk_min_f16 v3, v3, 4.0 op_sel_hi:[1,0]
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v7, v7
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_perm_b32 v1, v5, v1, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x4400
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__inlineimm2:
@@ -3165,17 +3884,24 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3198,17 +3924,24 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[2:3], v[0:1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %c, double %max0)
@@ -3229,20 +3962,31 @@ define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, do
 ;
 ; GFX9-LABEL: s_fminimum3_f64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
-; GFX9-NEXT:    v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_u_f64_e64 s[6:7], s[2:3], s[2:3]
+; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX9-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX9-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX9-NEXT:    v_cmp_u_f64_e64 s[6:7], s[0:1], s[0:1]
+; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX9-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX9-NEXT:    s_cselect_b32 s2, s0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_min_f64 v[0:1], s[0:1], v[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, s[4:5], s[4:5]
+; GFX9-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
-; GFX9-NEXT:    v_readfirstlane_b32 s1, v0
-; GFX9-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX9-NEXT:    ; return to shader part epilog
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3272,17 +4016,25 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fabs0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], |v[0:1]|, v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %max0 = call double @llvm.minimum.f64(double %a.fabs, double %b)
@@ -3306,17 +4058,25 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fabs1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], |v[2:3]|
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[2:3]|, |v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fabs = call double @llvm.fabs.f64(double %b)
   %max0 = call double @llvm.minimum.f64(double %a, double %b.fabs)
@@ -3340,17 +4100,25 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fabs2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[4:5]|, |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fabs = call double @llvm.fabs.f64(double %c)
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
@@ -3374,17 +4142,27 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fabs_all:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[2:3]|, |v[2:3]|
+; GFX9-NEXT:    v_and_b32_e32 v7, 0x7fffffff, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[4:5]|, |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3410,17 +4188,27 @@ define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fneg_all:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], -v[0:1], -v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    v_xor_b32_e32 v6, 0x80000000, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[2:3], -v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v7, 0x80000000, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[4:5], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %b.fneg = fneg double %b
@@ -3446,17 +4234,27 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fneg_fabs_all:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    v_or_b32_e32 v6, 0x80000000, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[2:3]|, -|v[2:3]|
+; GFX9-NEXT:    v_or_b32_e32 v7, 0x80000000, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], -|v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[4:5]|, -|v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3485,17 +4283,25 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fneg0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], -v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %max0 = call double @llvm.minimum.f64(double %a.fneg, double %b)
@@ -3519,17 +4325,25 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fneg1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], -v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v6, 0x80000000, v3
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[2:3], -v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fneg = fneg double %b
   %max0 = call double @llvm.minimum.f64(double %a, double %b.fneg)
@@ -3553,17 +4367,25 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_fneg2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v6, 0x80000000, v5
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[4:5], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fneg = fneg double %c
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
@@ -3587,19 +4409,21 @@ define double @v_fminimum3_f64_const0(double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_const0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x40200000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double 8.0, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3622,18 +4446,21 @@ define double @v_fminimum3_f64__const2(double %a, double %b) {
 ; GFX9-LABEL: v_fminimum3_f64__const2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[0:1]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40200000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double 8.0)
@@ -3656,17 +4483,21 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) {
 ; GFX9-LABEL: v_fminimum3_f64_inlineimm0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], 4.0
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x40100000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double 4.0, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3689,17 +4520,21 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) {
 ; GFX9-LABEL: v_fminimum3_f64__inlineimm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], 4.0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40100000
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double 4.0)
@@ -3722,20 +4557,18 @@ define double @v_fminimum3_f64_const1_const2(double %a) {
 ; GFX9-LABEL: v_fminimum3_f64_const1_const2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40200000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX9-NEXT:    s_mov_b32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s1, 0x40300000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40300000
 ; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double 8.0)
   %max1 = call double @llvm.minimum.f64(double %max0, double 16.0)
@@ -3758,15 +4591,20 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c)
 ; GFX942-LABEL: v_no_fminimum3_f32__multi_use:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fminimum3_f32__multi_use:
@@ -3792,17 +4630,24 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
 ;
 ; GFX942-LABEL: s_no_fminimum3_f32__multi_use:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s1, s1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_min_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f32_e64 vcc, s2, s2
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v2, v1
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -3856,15 +4701,20 @@ define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) {
 ; GFX942-LABEL: v_no_fminimum3_f16__multi_use:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX942-NEXT:    v_min_f16_e32 v1, v1, v2
 ; GFX942-NEXT:    v_pack_b32_f16 v0, v0, v1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3896,19 +4746,25 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half in
 ;
 ; GFX942-LABEL: s_no_fminimum3_f16__multi_use:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-NEXT:    v_min_f16_e32 v1, s0, v0
-; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s1, s1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX942-NEXT:    v_min_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
-; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    v_cmp_u_f16_e64 vcc, s2, s2
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX942-NEXT:    v_min_f16_e32 v1, v2, v1
+; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX942-NEXT:    ; return to shader part epilog
 ;
@@ -3952,24 +4808,39 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
 ; GFX942-LABEL: v_no_fminimum3_v2f16__multi_use:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    v_perm_b32 v0, v4, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v0, v1, v5, s0
-; GFX942-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v4, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX942-NEXT:    v_perm_b32 v1, v1, v5, s0
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v1, v4, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fminimum3_v2f16__multi_use:
@@ -4001,17 +4872,24 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
 ; GFX9-LABEL: v_no_fminimum3_f64__multi_use:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 833be2066cd54..bfb24a1970a8f 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1308,10 +1308,11 @@ define float @v_fneg_minimum_f32_ieee(float %a, float %b) #0 {
 ; GCN-LABEL: v_fneg_minimum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float %a, float %b)
   %fneg = fneg float %min
@@ -1322,10 +1323,11 @@ define float @v_fneg_minimum_f32_no_ieee(float %a, float %b) #4 {
 ; GCN-LABEL: v_fneg_minimum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float %a, float %b)
   %fneg = fneg float %min
@@ -1358,10 +1360,9 @@ define float @v_fneg_posk_minimum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_posk_minimum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v1, -v0, -4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, -4.0, -v0, s[4:5]
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 4.0, float %a)
   %fneg = fneg float %min
@@ -1372,10 +1373,9 @@ define float @v_fneg_posk_minimum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_posk_minimum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v1, -v0, -4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, -4.0, -v0, s[4:5]
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 4.0, float %a)
   %fneg = fneg float %min
@@ -1386,10 +1386,9 @@ define float @v_fneg_negk_minimum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_negk_minimum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v1, -v0, 4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 4.0, -v0, s[4:5]
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float -4.0, float %a)
   %fneg = fneg float %min
@@ -1400,10 +1399,9 @@ define float @v_fneg_negk_minimum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_negk_minimum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v1, -v0, 4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 4.0, -v0, s[4:5]
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float -4.0, float %a)
   %fneg = fneg float %min
@@ -1426,10 +1424,9 @@ define float @v_fneg_neg0_minimum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_neg0_minimum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v1, -v0, 0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -v0, s[4:5]
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float -0.0, float %a)
   %fneg = fneg float %min
@@ -1440,21 +1437,18 @@ define float @v_fneg_inv2pi_minimum_f32(float %a) #0 {
 ; SI-LABEL: v_fneg_inv2pi_minimum_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s4, 0xbe22f983
-; SI-NEXT:    v_max_f32_e64 v1, -v0, s4
-; SI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; SI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-NEXT:    v_mov_b32_e32 v1, 0xbe22f983
+; SI-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; SI-NEXT:    v_cndmask_b32_e64 v1, v1, -v0, s[4:5]
+; SI-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_inv2pi_minimum_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_min_f32_e32 v1, 0.15915494, v0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; VI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; VI-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v1, 0.15915494, v0, vcc
+; VI-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 0x3FC45F3060000000, float %a)
   %fneg = fneg float %min
@@ -1465,20 +1459,18 @@ define float @v_fneg_neg_inv2pi_minimum_f32(float %a) #0 {
 ; SI-LABEL: v_fneg_neg_inv2pi_minimum_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s4, 0x3e22f983
-; SI-NEXT:    v_max_f32_e64 v1, -v0, s4
-; SI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; SI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-NEXT:    v_mov_b32_e32 v1, 0x3e22f983
+; SI-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; SI-NEXT:    v_cndmask_b32_e64 v1, v1, -v0, s[4:5]
+; SI-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_neg_inv2pi_minimum_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f32_e64 v1, -v0, 0.15915494
-; VI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; VI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; VI-NEXT:    v_cndmask_b32_e64 v1, 0.15915494, -v0, s[4:5]
+; VI-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 0xBFC45F3060000000, float %a)
   %fneg = fneg float %min
@@ -1490,21 +1482,20 @@ define half @v_fneg_inv2pi_minimum_f16(half %a) #0 {
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
-; SI-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT:    v_mov_b32_e32 v1, 0xbe230000
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_max_f32_e32 v2, 0xbe230000, v0
-; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; SI-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; SI-NEXT:    v_max_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_inv2pi_minimum_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_min_f16_e32 v1, 0.15915494, v0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; VI-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; VI-NEXT:    v_mov_b32_e32 v1, 0x3118
+; VI-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; VI-NEXT:    v_max_f16_e64 v0, -v0, -v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call half @llvm.minimum.f16(half 0xH3118, half %a)
   %fneg = fneg half %min
@@ -1516,20 +1507,20 @@ define half @v_fneg_neg_inv2pi_minimum_f16(half %a) #0 {
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
-; SI-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT:    v_mov_b32_e32 v1, 0x3e230000
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_max_f32_e32 v2, 0x3e230000, v0
-; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; SI-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; SI-NEXT:    v_max_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_neg_inv2pi_minimum_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f16_e64 v1, -v0, 0.15915494
-; VI-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; VI-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v1, 0xb118
+; VI-NEXT:    v_cmp_u_f16_e64 vcc, -v0, -v0
+; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; VI-NEXT:    v_max_f16_e64 v0, -v0, -v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call half @llvm.minimum.f16(half 0xHB118, half %a)
   %fneg = fneg half %min
@@ -1540,23 +1531,24 @@ define double @v_fneg_inv2pi_minimum_f64(double %a) #0 {
 ; SI-LABEL: v_fneg_inv2pi_minimum_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
-; SI-NEXT:    s_mov_b32 s5, 0xbfc45f30
-; SI-NEXT:    v_max_f64 v[2:3], -v[0:1], s[4:5]
 ; SI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
-; SI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; SI-NEXT:    v_mov_b32_e32 v3, 0xbfc45f30
+; SI-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v2, 0x6dc9c882
+; SI-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], v[2:3]
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_inv2pi_minimum_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_min_f64 v[2:3], v[0:1], 0.15915494309189532
 ; VI-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, 0xfff80000
-; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; VI-NEXT:    v_cndmask_b32_e64 v1, -v3, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v2, 0x3fc45f30
+; VI-NEXT:    v_mov_b32_e32 v4, 0x6dc9c882
+; VI-NEXT:    v_cndmask_b32_e32 v3, v2, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; VI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[2:3]
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call double @llvm.minimum.f64(double 0x3fc45f306dc9c882, double %a)
   %fneg = fneg double %min
@@ -1567,23 +1559,25 @@ define double @v_fneg_neg_inv2pi_minimum_f64(double %a) #0 {
 ; SI-LABEL: v_fneg_neg_inv2pi_minimum_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
-; SI-NEXT:    s_mov_b32 s5, 0x3fc45f30
-; SI-NEXT:    v_max_f64 v[2:3], -v[0:1], s[4:5]
 ; SI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
-; SI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3fc45f30
+; SI-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v2, 0x6dc9c882
+; SI-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], v[2:3]
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_neg_inv2pi_minimum_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f64 v[2:3], -v[0:1], 0.15915494309189532
 ; VI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3fc45f30
+; VI-NEXT:    v_mov_b32_e32 v4, 0x6dc9c882
+; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; VI-NEXT:    v_max_f64 v[0:1], -v[0:1], v[2:3]
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call double @llvm.minimum.f64(double 0xbfc45f306dc9c882, double %a)
   %fneg = fneg double %min
@@ -1594,10 +1588,9 @@ define float @v_fneg_neg0_minimum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_neg0_minimum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v1, -v0, 0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -v0, s[4:5]
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float -0.0, float %a)
   %fneg = fneg float %min
@@ -1608,11 +1601,10 @@ define float @v_fneg_0_minimum_foldable_use_f32_ieee(float %a, float %b) #0 {
 ; GCN-LABEL: v_fneg_0_minimum_foldable_use_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e32 v2, 0, v0
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v2
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 0.0, float %a)
   %fneg = fneg float %min
@@ -1624,22 +1616,20 @@ define float @v_fneg_inv2pi_minimum_foldable_use_f32(float %a, float %b) #0 {
 ; SI-LABEL: v_fneg_inv2pi_minimum_foldable_use_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s4, 0xbe22f983
-; SI-NEXT:    v_max_f32_e64 v2, -v0, s4
-; SI-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; SI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbe22f983
+; SI-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; SI-NEXT:    v_cndmask_b32_e64 v2, v2, -v0, s[4:5]
+; SI-NEXT:    v_max_f32_e64 v0, -v0, v2
 ; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_inv2pi_minimum_foldable_use_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_min_f32_e32 v2, 0.15915494, v0
-; VI-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; VI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; VI-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; VI-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v2, 0.15915494, v0, vcc
+; VI-NEXT:    v_max_f32_e64 v0, -v0, -v2
+; VI-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 0x3FC45F3060000000, float %a)
   %fneg = fneg float %min
@@ -1651,11 +1641,10 @@ define float @v_fneg_0_minimum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
 ; GCN-LABEL: v_fneg_0_minimum_foldable_use_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e32 v2, 0, v0
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v2
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float 0.0, float %a)
   %fneg = fneg float %min
@@ -1667,10 +1656,11 @@ define { float, float } @v_fneg_minimum_multi_use_minimum_f32_ieee(float %a, flo
 ; GCN-LABEL: v_fneg_minimum_multi_use_minimum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float %a, float %b)
@@ -1685,10 +1675,11 @@ define <2 x float> @v_fneg_minimum_multi_use_minimum_f32_no_ieee(float %a, float
 ; GCN-LABEL: v_fneg_minimum_multi_use_minimum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float %a, float %b)
@@ -1707,10 +1698,11 @@ define float @v_fneg_maximum_f32_ieee(float %a, float %b) #0 {
 ; GCN-LABEL: v_fneg_maximum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float %a, float %b)
   %fneg = fneg float %max
@@ -1721,10 +1713,11 @@ define float @v_fneg_maximum_f32_no_ieee(float %a, float %b) #4 {
 ; GCN-LABEL: v_fneg_maximum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float %a, float %b)
   %fneg = fneg float %max
@@ -1757,10 +1750,9 @@ define float @v_fneg_posk_maximum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_posk_maximum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v1, -v0, -4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, -4.0, -v0, s[4:5]
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float 4.0, float %a)
   %fneg = fneg float %max
@@ -1771,10 +1763,9 @@ define float @v_fneg_posk_maximum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_posk_maximum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v1, -v0, -4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, -4.0, -v0, s[4:5]
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float 4.0, float %a)
   %fneg = fneg float %max
@@ -1785,10 +1776,9 @@ define float @v_fneg_negk_maximum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_negk_maximum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v1, -v0, 4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 4.0, -v0, s[4:5]
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float -4.0, float %a)
   %fneg = fneg float %max
@@ -1799,10 +1789,9 @@ define float @v_fneg_negk_maximum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_negk_maximum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v1, -v0, 4.0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 4.0, -v0, s[4:5]
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float -4.0, float %a)
   %fneg = fneg float %max
@@ -1825,10 +1814,9 @@ define float @v_fneg_neg0_maximum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_neg0_maximum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v1, -v0, 0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -v0, s[4:5]
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float -0.0, float %a)
   %fneg = fneg float %max
@@ -1839,10 +1827,9 @@ define float @v_fneg_neg0_maximum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_neg0_maximum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v1, -v0, 0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 s[4:5], -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -v0, s[4:5]
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float -0.0, float %a)
   %fneg = fneg float %max
@@ -1853,11 +1840,10 @@ define float @v_fneg_0_maximum_foldable_use_f32_ieee(float %a, float %b) #0 {
 ; GCN-LABEL: v_fneg_0_maximum_foldable_use_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e32 v2, 0, v0
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v2
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float 0.0, float %a)
   %fneg = fneg float %max
@@ -1869,11 +1855,10 @@ define float @v_fneg_0_maximum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
 ; GCN-LABEL: v_fneg_0_maximum_foldable_use_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e32 v2, 0, v0
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v2
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float 0.0, float %a)
   %fneg = fneg float %max
@@ -1885,10 +1870,11 @@ define { float, float } @v_fneg_maximum_multi_use_maximum_f32_ieee(float %a, flo
 ; GCN-LABEL: v_fneg_maximum_multi_use_maximum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float %a, float %b)
@@ -1903,10 +1889,11 @@ define <2 x float> @v_fneg_maximum_multi_use_maximum_f32_no_ieee(float %a, float
 ; GCN-LABEL: v_fneg_maximum_multi_use_maximum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v2, -v0, -v1
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v1, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximum.f32(float %a, float %b)
@@ -4453,6 +4440,13 @@ define float @v_fmul_0_fsub_0_safe_infloop_regression(float %arg) {
 ; SI-NSZ-NEXT:    s_brev_b32 s4, 1
 ; SI-NSZ-NEXT:    v_fma_f32 v0, v0, s4, 0
 ; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fmul_0_fsub_0_safe_infloop_regression:
+; VI:       ; %bb.0: ; %bb
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mul_f32_e32 v0, 0, v0
+; VI-NEXT:    v_sub_f32_e32 v0, 0, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
 ; FIXME: utils/update_llc_test_checks.py will generate redundant VI
 ; labels, remove them, they will cause test failure.
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index f00657da440f0..a712333513bf0 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -3031,10 +3031,10 @@ define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly c
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
-; GFX6-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
-; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX6-NEXT:    v_not_b32_e32 v5, -4.0
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX6-NEXT:    v_min_f32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
 ; GFX6-NEXT:    s_mov_b32 s6, 0
@@ -3146,10 +3146,10 @@ define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeo
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
-; GFX6-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
-; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX6-NEXT:    v_not_b32_e32 v5, -4.0
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX6-NEXT:    v_min_f32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
 ; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
 ; GFX6-NEXT:    s_mov_b32 s6, 0
@@ -3531,10 +3531,10 @@ define float @basic_fract_f32_nonans_minimum(float nofpclass(nan) %x) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_floor_f32_e32 v1, v0
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX6-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX6-NEXT:    v_not_b32_e32 v1, -4.0
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX6-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: basic_fract_f32_nonans_minimum:
@@ -3760,10 +3760,10 @@ define float @basic_fract_f32_flags_minimum(float %x) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_floor_f32_e32 v1, v0
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX6-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX6-NEXT:    v_not_b32_e32 v1, -4.0
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX6-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: basic_fract_f32_flags_minimum:
@@ -3771,10 +3771,10 @@ define float @basic_fract_f32_flags_minimum(float %x) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_floor_f32_e32 v1, v0
 ; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX7-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT:    v_not_b32_e32 v1, -4.0
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: basic_fract_f32_flags_minimum:
@@ -3782,10 +3782,10 @@ define float @basic_fract_f32_flags_minimum(float %x) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_floor_f32_e32 v1, v0
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX8-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_not_b32_e32 v1, -4.0
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: basic_fract_f32_flags_minimum:
@@ -3794,10 +3794,10 @@ define float @basic_fract_f32_flags_minimum(float %x) {
 ; GFX11-NEXT:    v_floor_f32_e32 v1, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3f7fffff, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: basic_fract_f32_flags_minimum:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index 7fd70de81af6f..70c2b9d67fce4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -15,30 +15,33 @@ define half @v_maximum_f16(half %src0, half %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f16:
@@ -50,27 +53,33 @@ define half @v_maximum_f16(half %src0, half %src1) {
 ; GFX10-LABEL: v_maximum_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_maximum_f16:
@@ -166,30 +175,33 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f16__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f16__nsz:
@@ -201,27 +213,33 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
 ; GFX10-LABEL: v_maximum_f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_maximum_f16__nsz:
@@ -317,33 +335,36 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f16__nnan_src0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f16__nnan_src0:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f16__nnan_src0:
@@ -357,29 +378,37 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_f16__nnan_src0:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_f16__nnan_src0:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src0:
@@ -416,33 +445,30 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f16__nnan_src1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f16__nnan_src1:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f16__nnan_src1:
@@ -456,29 +482,29 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_f16__nnan_src1:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.h, 1.0, v1.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_f16__nnan_src1:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src1:
@@ -515,12 +541,13 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, s17
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s16
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_max_f32_e32 v3, v1, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v1, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v0
@@ -530,12 +557,13 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX8-LABEL: s_maximum_f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s17
-; GFX8-NEXT:    v_max_f16_e32 v1, s16, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s16
+; GFX8-NEXT:    v_mov_b32_e32 v1, s17
+; GFX8-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -544,12 +572,13 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX900-LABEL: s_maximum_f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s17
-; GFX900-NEXT:    v_max_f16_e32 v1, s16, v0
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v1, s17
+; GFX900-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -570,9 +599,12 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX10-LABEL: s_maximum_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f16_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v0, s17
+; GFX10-NEXT:    v_cmp_u_f16_e64 vcc_lo, s17, s17
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s17, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
@@ -582,10 +614,14 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_maximum_f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
-; GFX11-TRUE16-NEXT:    v_max_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s1, s1
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s0, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s1, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX11-TRUE16-NEXT:    ; use v0
@@ -595,10 +631,14 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-FAKE16-LABEL: s_maximum_f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_max_f16_e64 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s1, s1
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
@@ -640,41 +680,51 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT:    v_max_f16_e32 v2, v3, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v2, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v4, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_perm_b32 v0, v3, v0, s4
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f16:
@@ -686,40 +736,56 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v2, s4
-; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v1, v1
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s4
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v1.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v2f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v3, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f16:
@@ -802,41 +868,51 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f16__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT:    v_max_f16_e32 v2, v3, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v2, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v4, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_perm_b32 v0, v3, v0, s4
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f16__nsz:
@@ -848,40 +924,56 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v2f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v2, s4
-; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v1, v1
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s4
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v2f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v1.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v2f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v3, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f16__nsz:
@@ -958,20 +1050,23 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, s19
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s17
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, s18
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, s16
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, s18
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, s16
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_max_f32_e32 v4, v1, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v3, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
@@ -983,18 +1078,23 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX8-LABEL: s_maximum_v2f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_lshr_b32 s4, s17, 16
-; GFX8-NEXT:    s_lshr_b32 s5, s16, 16
+; GFX8-NEXT:    s_lshr_b32 s4, s16, 16
+; GFX8-NEXT:    s_lshr_b32 s5, s17, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s5, v0
-; GFX8-NEXT:    v_max_f16_e32 v0, s5, v0
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7e00
+; GFX8-NEXT:    v_mov_b32_e32 v1, s5
+; GFX8-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_mov_b32_e32 v1, s16
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s17
-; GFX8-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v3, s16, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v2
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -1003,19 +1103,25 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX900-LABEL: s_maximum_v2f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s17
-; GFX900-NEXT:    v_mov_b32_e32 v1, s17
-; GFX900-NEXT:    s_lshr_b32 s4, s17, 16
-; GFX900-NEXT:    v_pk_max_f16 v1, s16, v1
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX900-NEXT:    s_lshr_b32 s5, s16, 16
-; GFX900-NEXT:    v_mov_b32_e32 v3, s4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s5, v3
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX900-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX900-NEXT:    s_lshr_b32 s4, s16, 16
+; GFX900-NEXT:    s_lshr_b32 s5, s17, 16
+; GFX900-NEXT:    v_mov_b32_e32 v0, s4
+; GFX900-NEXT:    v_mov_b32_e32 v1, s5
+; GFX900-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_mov_b32_e32 v2, s16
+; GFX900-NEXT:    v_mov_b32_e32 v3, s17
+; GFX900-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX900-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX900-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX900-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX900-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -1035,16 +1141,23 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX10-LABEL: s_maximum_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, s17
+; GFX10-NEXT:    v_cmp_u_f16_e64 vcc_lo, s17, s17
 ; GFX10-NEXT:    s_lshr_b32 s4, s17, 16
 ; GFX10-NEXT:    s_lshr_b32 s5, s16, 16
-; GFX10-NEXT:    v_pk_max_f16 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s5, s4
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, s16, s17
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x7e00, v0, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v2
-; GFX10-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX10-NEXT:    v_mov_b32_e32 v1, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e64 vcc_lo, s4, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, s17, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, s4, v1, vcc_lo
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -1053,15 +1166,23 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_maximum_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, s0, s1
 ; GFX11-TRUE16-NEXT:    s_lshr_b32 s2, s1, 16
-; GFX11-TRUE16-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, s0, s1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, s3, s2
-; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v1.l, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s4, s1, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, s2, s2
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, s1
+; GFX11-TRUE16-NEXT:    s_lshr_b32 s5, s0, 16
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, s5, v0.l, s3
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, s0, v0.h, s4
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s2, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s1, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v1, v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX11-TRUE16-NEXT:    ; use v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMEND
@@ -1070,18 +1191,26 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-FAKE16-LABEL: s_maximum_v2f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s1, s1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s2, s2
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, s1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, s2, v1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMEND
@@ -1113,58 +1242,82 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_max_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v6, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f16:
@@ -1177,51 +1330,94 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v3f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v5, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v3f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v3f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f16:
@@ -1311,58 +1507,82 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f16__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_max_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v6, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f16__nsz:
@@ -1375,51 +1595,94 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v3f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v5, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v3f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v3f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f16__nsz:
@@ -1509,75 +1772,99 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_max_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v8, v7, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v7, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
-; GFX8-NEXT:    v_max_f16_e32 v7, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f16:
@@ -1590,62 +1877,94 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v4f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v6, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v5, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_e64 s5, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v4, v6, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v4, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v5, s5
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v6, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v4f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v5, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.h, v3.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v5.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v5.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v4.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v4f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v7, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f16:
@@ -1742,75 +2061,99 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f16__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_max_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_max_f16_e32 v8, v7, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v7, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
-; GFX8-NEXT:    v_max_f16_e32 v7, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f16__nsz:
@@ -1823,62 +2166,94 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v4f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v6, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v5, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_e64 s5, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v4, v6, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v4, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v5, s5
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v6, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v4f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v5, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.h, v3.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v5.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v5.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v4.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v4f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v7, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f16__nsz:
@@ -1975,140 +2350,189 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_max_f32_e32 v16, v0, v8
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v1, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v2, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v15, v15
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v3, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v4, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v5, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v6, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v7, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX7-NEXT:    v_max_f32_e32 v4, v4, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_max_f32_e32 v5, v5, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX7-NEXT:    v_max_f32_e32 v7, v7, v8
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v8f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
-; GFX8-NEXT:    v_max_f16_e32 v10, v9, v8
-; GFX8-NEXT:    v_mov_b32_e32 v11, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v9, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc
-; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX8-NEXT:    v_max_f16_e32 v12, v10, v9
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v10, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v9, v11, v12, vcc
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
-; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v1
-; GFX8-NEXT:    v_max_f16_e32 v13, v12, v10
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v12, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v10, v11, v13, vcc
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX8-NEXT:    v_max_f16_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc
+; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
+; GFX8-NEXT:    v_max_f16_sdwa v9, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v14, v13, v12
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v13, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v12, v11, v14, vcc
-; GFX8-NEXT:    v_max_f16_e32 v13, v3, v7
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v13, vcc
-; GFX8-NEXT:    v_max_f16_e32 v7, v2, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v11, v7, vcc
-; GFX8-NEXT:    v_max_f16_e32 v6, v1, v5
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v6, vcc
-; GFX8-NEXT:    v_max_f16_e32 v5, v0, v4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v11, v5, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v9
-; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
-; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_sdwa v10, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v11, v11, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v3, v3, v7
+; GFX8-NEXT:    v_max_f16_e32 v2, v2, v6
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v5
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v4
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v11
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v10
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v9
+; GFX8-NEXT:    v_or_b32_e32 v3, v3, v8
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v8f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v8, v3, v7
-; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
-; GFX900-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX900-NEXT:    v_pk_max_f16 v7, v2, v6
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
-; GFX900-NEXT:    v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX900-NEXT:    v_pk_max_f16 v6, v1, v5
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX900-NEXT:    v_pk_max_f16 v5, v0, v4
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v6, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v7, s4
-; GFX900-NEXT:    v_perm_b32 v2, v2, v8, s4
-; GFX900-NEXT:    v_perm_b32 v3, v3, v10, s4
+; GFX900-NEXT:    v_perm_b32 v4, v9, v4, s4
+; GFX900-NEXT:    v_perm_b32 v0, v8, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v5, v8, v5, s4
+; GFX900-NEXT:    v_perm_b32 v1, v4, v1, s4
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
+; GFX900-NEXT:    v_perm_b32 v5, v5, v6, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_pk_max_f16 v2, v2, v5
+; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v7
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
+; GFX900-NEXT:    v_perm_b32 v5, v5, v6, s4
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_pk_max_f16 v3, v3, v5
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v8f16:
@@ -2123,102 +2547,170 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v8f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v8, v3, v7
-; GFX10-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX10-NEXT:    v_pk_max_f16 v10, v2, v6
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s5, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v12, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v11, v9, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_sdwa v6, v9, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v10, v1, v5
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s5
-; GFX10-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_sdwa v13, v9, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v12, vcc_lo
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v9, v12, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_perm_b32 v1, v13, v1, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
+; GFX10-NEXT:    v_lshrrev_b32_e32 v13, 16, v7
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX10-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v5, v10, v5, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v3, v12, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v7, v13, v7, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v3, v3, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v8f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3.l, v7.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v8, v3, v7
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.l, v6.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v9, v2, v6
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v4.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v10, v0, v4
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v8.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3.h, v7.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v9.l, s0
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v1.l, v5.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v7, v1, v5
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v0.h, v4.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v1.h, v5.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v2.h, v6.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v10.l, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v7.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v10.h, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v7.h, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v9.h, s4
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v8.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4.h, v4.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v4.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v5.h, v5.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v5.l, v5.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v7.l, v7.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v4.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v4.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v5.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v5.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v7.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6.h, v6.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v6.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v7.h, v7.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v7.l, v3.l, s3
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v6.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v7.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, v2.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v7.h, v3.h, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v3, v3, v7
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v8f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v8, v3, v7
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v10, v2, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v7
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v14, v1, v5
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v11, v0, v4
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v10, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v13, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v12, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v13, v7, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v3, v3, v7
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v8f16:
@@ -2249,262 +2741,363 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v0, v0, v16
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v17
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v20
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v16
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v1, v1, v16
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v18
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v22
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v16
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v2, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v2, v2, v16
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v19
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v27
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v16
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v3, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v3, v3, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v20
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v17
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v21
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v19, v29
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v17
-; GFX7-NEXT:    v_max_f32_e32 v11, v11, v17
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v28
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v16
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v4, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v4, v4, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v21
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v20, v13
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v17
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v12
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v19
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v20
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[26:27], v18, v13
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v16
-; GFX7-NEXT:    v_max_f32_e32 v5, v5, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v22
-; GFX7-NEXT:    v_max_f32_e32 v13, v18, v13
-; GFX7-NEXT:    v_max_f32_e32 v18, v17, v12
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[28:29], v17, v12
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
+; GFX7-NEXT:    buffer_load_dword v16, off, s[0:3], s32
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v5, vcc
+; GFX7-NEXT:    v_max_f32_e32 v5, v5, v17
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v18
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v23
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v6, vcc
+; GFX7-NEXT:    v_max_f32_e32 v6, v6, v17
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v24
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v18
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT:    v_mov_b32_e32 v19, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v16
-; GFX7-NEXT:    v_max_f32_e32 v6, v6, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v23
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_max_f32_e32 v7, v7, v18
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v25
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v8, vcc
+; GFX7-NEXT:    v_max_f32_e32 v8, v8, v17
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v26
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v18
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v19, v0, vcc
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v19, v1, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v19, v2, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[8:9]
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v16
-; GFX7-NEXT:    v_max_f32_e32 v7, v7, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v24
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v19, v4, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v19, v5, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v19, v6, s[14:15]
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v19, v7, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v19, v11, s[24:25]
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v16
-; GFX7-NEXT:    v_max_f32_e32 v8, v8, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v25
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v19, v8, s[18:19]
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v16
-; GFX7-NEXT:    v_max_f32_e32 v9, v9, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v26
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v19, v9, s[20:21]
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v16
-; GFX7-NEXT:    v_max_f32_e32 v10, v10, v16
-; GFX7-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v19, v10, s[22:23]
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_max_f32_e32 v9, v9, v18
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v27
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v10, vcc
+; GFX7-NEXT:    v_max_f32_e32 v10, v10, v17
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v28
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v18
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_max_f32_e32 v11, v11, v18
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v29
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v12, vcc
+; GFX7-NEXT:    v_max_f32_e32 v12, v12, v17
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v30
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v18
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v14, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v16
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v30
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v12
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v16, v16
+; GFX7-NEXT:    v_max_f32_e32 v13, v13, v18
+; GFX7-NEXT:    v_max_f32_e32 v14, v14, v17
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v19, v13, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v19, v18, s[28:29]
-; GFX7-NEXT:    v_max_f32_e32 v18, v14, v16
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v14, v16
-; GFX7-NEXT:    v_cndmask_b32_e32 v14, v19, v18, vcc
-; GFX7-NEXT:    v_max_f32_e32 v16, v15, v17
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v15, v17
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v19, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX7-NEXT:    v_max_f32_e32 v15, v15, v16
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v16f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 16, v15
+; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v16, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v17, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; GFX8-NEXT:    v_max_f16_e32 v16, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
-; GFX8-NEXT:    v_max_f16_e32 v20, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[4:5], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v12
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v4
-; GFX8-NEXT:    v_max_f16_e32 v21, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[6:7], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v11
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
-; GFX8-NEXT:    v_max_f16_e32 v22, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[8:9], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v2
-; GFX8-NEXT:    v_max_f16_e32 v23, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[10:11], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v9
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
-; GFX8-NEXT:    v_mov_b32_e32 v19, 0x7e00
-; GFX8-NEXT:    v_max_f16_e32 v24, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[12:13], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v25, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[14:15], v18, v17
-; GFX8-NEXT:    v_max_f16_e32 v17, v6, v14
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[16:17], v6, v14
-; GFX8-NEXT:    v_max_f16_e32 v6, v5, v13
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[18:19], v5, v13
-; GFX8-NEXT:    v_max_f16_e32 v5, v4, v12
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[20:21], v4, v12
-; GFX8-NEXT:    v_max_f16_e32 v4, v3, v11
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[22:23], v3, v11
-; GFX8-NEXT:    v_max_f16_e32 v3, v2, v10
-; GFX8-NEXT:    v_max_f16_e32 v11, v7, v15
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[24:25], v7, v15
-; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v15
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v14, v19, v16, vcc
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
-; GFX8-NEXT:    v_max_f16_e32 v13, v7, v12
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[26:27], v7, v12
-; GFX8-NEXT:    v_max_f16_e32 v7, v1, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v19, v3, vcc
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
-; GFX8-NEXT:    v_max_f16_e32 v12, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v18, v19, v22, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v22, v19, v25, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v19, v7, vcc
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v19, v21, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v21, v19, v24, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v19, v12, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v22
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, v19, v20, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v20, v19, v23, s[10:11]
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v21
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v20
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v19, v4, s[22:23]
-; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v18
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v5, s[20:21]
-; GFX8-NEXT:    v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v16
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v19, v13, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v19, v6, s[18:19]
-; GFX8-NEXT:    v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 16, v15
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v19, v11, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, v19, v17, s[16:17]
-; GFX8-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 16, v14
-; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 16, v13
-; GFX8-NEXT:    v_or_b32_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v7, v11, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v17, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v17, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v18, v18
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v13
+; GFX8-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v18, v18
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v19, v19
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v19, 16, v12
+; GFX8-NEXT:    v_lshrrev_b32_e32 v20, 16, v4
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v19, v19
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v19, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v20, v20
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v20, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v20, 16, v11
+; GFX8-NEXT:    v_lshrrev_b32_e32 v21, 16, v3
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v20, v20
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v21, v21
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v21, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v21, 16, v10
+; GFX8-NEXT:    v_lshrrev_b32_e32 v22, 16, v2
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v21, v21
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v21, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v22, v22
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v22, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v22, 16, v9
+; GFX8-NEXT:    v_lshrrev_b32_e32 v23, 16, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v22, v22
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v23, v23
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v23, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
+; GFX8-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v23, v23
+; GFX8-NEXT:    v_cndmask_b32_e32 v24, v24, v23, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v24, v24
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_max_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_max_f16_e32 v7, v7, v15
+; GFX8-NEXT:    v_max_f16_e32 v6, v6, v14
+; GFX8-NEXT:    v_max_f16_e32 v5, v5, v13
+; GFX8-NEXT:    v_max_f16_e32 v4, v4, v12
+; GFX8-NEXT:    v_max_f16_e32 v3, v3, v11
+; GFX8-NEXT:    v_max_f16_e32 v2, v2, v10
+; GFX8-NEXT:    v_max_f16_e32 v1, v1, v9
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v8
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v23
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v22
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v21
+; GFX8-NEXT:    v_or_b32_e32 v3, v3, v20
+; GFX8-NEXT:    v_or_b32_e32 v4, v4, v19
+; GFX8-NEXT:    v_or_b32_e32 v5, v5, v18
+; GFX8-NEXT:    v_or_b32_e32 v6, v6, v17
+; GFX8-NEXT:    v_or_b32_e32 v7, v7, v16
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v16f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_max_f16 v16, v7, v15
-; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v7, v15
-; GFX900-NEXT:    v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX900-NEXT:    v_pk_max_f16 v15, v6, v14
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v6, v14
-; GFX900-NEXT:    v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX900-NEXT:    v_pk_max_f16 v14, v5, v13
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v5, v13
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX900-NEXT:    v_pk_max_f16 v13, v4, v12
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v4, v12
-; GFX900-NEXT:    v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX900-NEXT:    v_pk_max_f16 v12, v3, v11
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v11
-; GFX900-NEXT:    v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX900-NEXT:    v_pk_max_f16 v11, v2, v10
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
-; GFX900-NEXT:    v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX900-NEXT:    v_pk_max_f16 v10, v1, v9
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
-; GFX900-NEXT:    v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX900-NEXT:    v_pk_max_f16 v9, v0, v8
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX900-NEXT:    v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX900-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
+; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v17, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v10, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v11, s4
-; GFX900-NEXT:    v_perm_b32 v2, v2, v12, s4
-; GFX900-NEXT:    v_perm_b32 v3, v3, v13, s4
-; GFX900-NEXT:    v_perm_b32 v4, v4, v14, s4
-; GFX900-NEXT:    v_perm_b32 v5, v5, v15, s4
-; GFX900-NEXT:    v_perm_b32 v6, v6, v16, s4
-; GFX900-NEXT:    v_perm_b32 v7, v7, v18, s4
+; GFX900-NEXT:    v_perm_b32 v8, v17, v8, s4
+; GFX900-NEXT:    v_perm_b32 v0, v16, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v9
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v8
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v16, v9, s4
+; GFX900-NEXT:    v_perm_b32 v1, v8, v1, s4
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v10
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v2, v8, v2, s4
+; GFX900-NEXT:    v_pk_max_f16 v2, v2, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v11
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v3, v8, v3, s4
+; GFX900-NEXT:    v_pk_max_f16 v3, v3, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v12
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v12, v4, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v4, v8, v4, s4
+; GFX900-NEXT:    v_pk_max_f16 v4, v4, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v13
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v13, v13
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v13, v5, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v5, v8, v5, s4
+; GFX900-NEXT:    v_pk_max_f16 v5, v5, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v14, v14
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v14, v6, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v6, v8, v6, s4
+; GFX900-NEXT:    v_pk_max_f16 v6, v6, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v15
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v15, v15
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v15, v7, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v7, v8, v7, s4
+; GFX900-NEXT:    v_pk_max_f16 v7, v7, v9
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v16f16:
@@ -2523,192 +3116,310 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX10-LABEL: v_maximum_v16f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v16, v7, v15
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
-; GFX10-NEXT:    v_pk_max_f16 v18, v6, v14
-; GFX10-NEXT:    v_pk_max_f16 v19, v3, v11
-; GFX10-NEXT:    v_pk_max_f16 v20, v2, v10
-; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v16
-; GFX10-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v15, 16, v18
-; GFX10-NEXT:    v_pk_max_f16 v21, v0, v8
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
-; GFX10-NEXT:    v_pk_max_f16 v17, v5, v13
-; GFX10-NEXT:    v_lshrrev_b32_e32 v23, 16, v21
-; GFX10-NEXT:    v_perm_b32 v7, v7, v16, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
-; GFX10-NEXT:    v_perm_b32 v6, v6, v18, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v17, v4, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
-; GFX10-NEXT:    v_perm_b32 v5, v5, v15, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
-; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v19
-; GFX10-NEXT:    v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v11, v1, v9
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX10-NEXT:    v_lshrrev_b32_e32 v22, 16, v11
-; GFX10-NEXT:    v_perm_b32 v3, v3, v19, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX10-NEXT:    v_lshrrev_b32_e32 v20, 16, v20
-; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
-; GFX10-NEXT:    v_perm_b32 v1, v1, v11, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_perm_b32 v0, v0, v9, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v4, v4, v13, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v8
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; GFX10-NEXT:    v_lshrrev_b32_e32 v19, 16, v9
+; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v19, v19
+; GFX10-NEXT:    v_perm_b32 v0, v17, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
+; GFX10-NEXT:    v_perm_b32 v8, v16, v8, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v8
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v19, v19, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_perm_b32 v1, v18, v1, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 16, v11
+; GFX10-NEXT:    v_perm_b32 v9, v19, v9, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_perm_b32 v2, v16, v2, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v10, v17, v10, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v19, v18, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v5
+; GFX10-NEXT:    v_pk_max_f16 v2, v2, v10
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v18, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 16, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_perm_b32 v3, v8, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v11, v16, v11, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v13
+; GFX10-NEXT:    v_pk_max_f16 v3, v3, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_perm_b32 v4, v9, v4, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v8, v10, v12, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v17, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v14
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX10-NEXT:    v_pk_max_f16 v4, v4, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v5, v9, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v13, v16, v13, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_pk_max_f16 v5, v5, v13
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_perm_b32 v6, v10, v6, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v9, v12, v14, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_pk_max_f16 v6, v6, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v7, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v7, v17, v7, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v10, v18, v15, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v7, v7, v10
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_maximum_v16f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7.l, v15.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v16, v7, v15
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v6.l, v14.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v6.h, v14.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v5.l, v13.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v5.h, v13.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, 0x7e00, v16.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7.h, v15.h
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v15, v6, v14
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v14, v5, v13
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v13, v4, v12
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v1.h, v9.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, 0x7e00, v16.h, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, 0x7e00, v15.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, 0x7e00, v15.h, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, 0x7e00, v14.l, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, 0x7e00, v14.h, s3
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4.l, v12.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v11.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v14, v3, v11
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v2.l, v10.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v15, v2, v10
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, 0x7e00, v13.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4.h, v12.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v14.l, s0
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.h, v11.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v15.l, s1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v1.l, v9.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v11, v1, v9
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v0.l, v8.l
-; GFX11-TRUE16-NEXT:    v_pk_max_f16 v12, v0, v8
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v0.h, v8.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s5, v2.h, v10.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v11.l, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v11.h, s4
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v12.l, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v12.h, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v15.h, s5
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v14.h, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, 0x7e00, v13.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8.h, v8.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v8.l, v8.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v9.l, v9.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v10.h, v10.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v10.l, v10.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v8.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v8.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v9.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v10.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v10.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.h, v8.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9.h, v9.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.l, v9.l, v1.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.h, v10.h, v2.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.l, v10.l, v2.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v12.h, v12.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v9.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v12.l, v12.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v13.h, v13.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, v12.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.l, v8.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11.h, v11.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, v12.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, v13.h, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.h, v9.h, v1.h, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v11.l, v11.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v11.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v4.h, v4.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v4.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v5.h, v5.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v11.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v8
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v2, v2, v10
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.h, v11.h, v3.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.h, v12.h, v4.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.l, v12.l, v4.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.h, v13.h, v5.h, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.l, v11.l, v3.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13.l, v13.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v14.h, v14.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v14.l, v14.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v15.h, v15.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v15.l, v15.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, v13.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, v14.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, v14.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v7.h, v15.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v7.l, v15.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5.l, v5.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v6.h, v6.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v6.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v7.h, v7.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v7.l, v7.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.l, v13.l, v5.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v11.h, v14.h, v6.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v11.l, v14.l, v6.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v12.h, v15.h, v7.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v12.l, v15.l, v7.l, s3
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v3, v3, v8
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v4, v9
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v5, v5, v10
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v6, v6, v11
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v7, v7, v12
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_maximum_v16f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v16, v7, v15
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v15, v6, v14
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v20, v4, v12
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v22, v2, v10
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v14, v5, v13
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v17, v3, v11
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
-; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v19, v1, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-FAKE16-NEXT:    v_pk_max_f16 v22, v0, v8
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v9
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v19, v19
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc_lo
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v19, v19, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v17, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v8, v16, v8, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v18, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v11
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v9, v19, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v16, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v10, v17, v10, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v19, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v2, v2, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v12
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v18, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v15
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v11, v16, v11, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v13
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v12, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v9, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v8, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v8, v10, v12, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v3, v3, v11
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v4, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v16, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v13, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v9, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v12, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v14, v14
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v15, v15
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v13, v16, v13, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v5, v5, v13
+; GFX11-FAKE16-NEXT:    v_perm_b32 v9, v12, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v18, v18, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, v15, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v10, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v17, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v10, v18, v15, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v6, v6, v9
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v7, v7, v10
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v16f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 97eafd07d4b37..70b45d8051d8b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -12,28 +12,31 @@ define float @v_maximum_f32(float %src0, float %src1) {
 ; GFX7-LABEL: v_maximum_f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f32:
@@ -45,18 +48,22 @@ define float @v_maximum_f32(float %src0, float %src1) {
 ; GFX10-LABEL: v_maximum_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f32:
@@ -120,28 +127,31 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
 ; GFX7-LABEL: v_maximum_f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f32__nsz:
@@ -153,18 +163,22 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
 ; GFX10-LABEL: v_maximum_f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f32__nsz:
@@ -229,30 +243,33 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f32__nnan_src0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f32__nnan_src0:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f32__nnan_src0:
@@ -266,19 +283,24 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f32__nnan_src0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f32__nnan_src0:
@@ -302,30 +324,27 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f32__nnan_src1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f32__nnan_src1:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f32__nnan_src1:
@@ -339,19 +358,19 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f32__nnan_src1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f32__nnan_src1:
@@ -374,11 +393,13 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX7-LABEL: s_maximum_f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s17
-; GFX7-NEXT:    v_max_f32_e32 v1, s16, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT:    v_mov_b32_e32 v0, s16
+; GFX7-NEXT:    v_mov_b32_e32 v1, s17
+; GFX7-NEXT:    v_cmp_u_f32_e64 vcc, s17, s17
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v0
 ; GFX7-NEXT:    ;;#ASMEND
@@ -387,11 +408,13 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX8-LABEL: s_maximum_f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s17
-; GFX8-NEXT:    v_max_f32_e32 v1, s16, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v0, s16
+; GFX8-NEXT:    v_mov_b32_e32 v1, s17
+; GFX8-NEXT:    v_cmp_u_f32_e64 vcc, s17, s17
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -400,11 +423,13 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX900-LABEL: s_maximum_f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s17
-; GFX900-NEXT:    v_max_f32_e32 v1, s16, v0
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v1, s17
+; GFX900-NEXT:    v_cmp_u_f32_e64 vcc, s17, s17
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -423,9 +448,12 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX10-LABEL: s_maximum_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v0, s17
+; GFX10-NEXT:    v_cmp_u_f32_e64 vcc_lo, s17, s17
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s17, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -434,10 +462,14 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX11-LABEL: s_maximum_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s1, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v0
 ; GFX11-NEXT:    ;;#ASMEND
@@ -465,37 +497,46 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v2f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX900-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f32:
@@ -508,23 +549,32 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v2f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_max_f32_e32 v5, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v2f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_max_f32 v0, v0, v2
+; GFX11-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f32:
@@ -594,37 +644,46 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v2f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX900-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f32__nsz:
@@ -637,23 +696,32 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v2f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_max_f32_e32 v5, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v2f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_max_f32 v0, v0, v2
+; GFX11-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f32__nsz:
@@ -723,15 +791,20 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX7-LABEL: s_maximum_v2f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s19
-; GFX7-NEXT:    v_max_f32_e32 v1, s17, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
-; GFX7-NEXT:    v_mov_b32_e32 v0, s18
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, s16, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_mov_b32_e32 v0, s17
+; GFX7-NEXT:    v_mov_b32_e32 v1, s19
+; GFX7-NEXT:    v_cmp_u_f32_e64 vcc, s19, s19
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v1, v0, v1
+; GFX7-NEXT:    v_mov_b32_e32 v0, s16
+; GFX7-NEXT:    v_mov_b32_e32 v2, s18
+; GFX7-NEXT:    v_cmp_u_f32_e64 vcc, s18, s18
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:1]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -740,15 +813,20 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX8-LABEL: s_maximum_v2f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s19
-; GFX8-NEXT:    v_max_f32_e32 v1, s17, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s18
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v3, s16, v0
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v0, s17
+; GFX8-NEXT:    v_mov_b32_e32 v1, s19
+; GFX8-NEXT:    v_cmp_u_f32_e64 vcc, s19, s19
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v1, v0, v1
+; GFX8-NEXT:    v_mov_b32_e32 v0, s16
+; GFX8-NEXT:    v_mov_b32_e32 v2, s18
+; GFX8-NEXT:    v_cmp_u_f32_e64 vcc, s18, s18
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -757,15 +835,20 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX900-LABEL: s_maximum_v2f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s19
-; GFX900-NEXT:    v_max_f32_e32 v1, s17, v0
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
-; GFX900-NEXT:    v_mov_b32_e32 v0, s18
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX900-NEXT:    v_max_f32_e32 v3, s16, v0
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT:    v_mov_b32_e32 v0, s17
+; GFX900-NEXT:    v_mov_b32_e32 v1, s19
+; GFX900-NEXT:    v_cmp_u_f32_e64 vcc, s19, s19
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v1, v0, v1
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v2, s18
+; GFX900-NEXT:    v_cmp_u_f32_e64 vcc, s18, s18
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v2
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:1]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -786,12 +869,18 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX10-LABEL: s_maximum_v2f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e64 v0, s17, s19
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s17, s19
-; GFX10-NEXT:    v_max_f32_e64 v2, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v0, s19
+; GFX10-NEXT:    v_cmp_u_f32_e64 vcc_lo, s19, s19
+; GFX10-NEXT:    v_mov_b32_e32 v1, s18
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s17, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e64 vcc_lo, s18, s18
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, s16, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s19, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, s18, v2, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v2, v3
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:1]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -800,13 +889,20 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX11-LABEL: s_maximum_v2f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e64 v0, s1, s3
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT:    v_max_f32_e64 v2, s0, s2
+; GFX11-NEXT:    v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s3, s3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s2, s2
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, s0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v1, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, s2, v2, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v2, v3
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:1]
 ; GFX11-NEXT:    ;;#ASMEND
@@ -835,46 +931,61 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v3f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX900-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX900-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f32:
@@ -888,29 +999,40 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v3f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_max_f32_e32 v7, v1, v4
-; GFX10-NEXT:    v_max_f32_e32 v8, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v3f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT:    v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v2 :: v_dual_max_f32 v0, v0, v3
+; GFX11-NEXT:    v_dual_max_f32 v1, v1, v4 :: v_dual_max_f32 v2, v2, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f32:
@@ -987,46 +1109,61 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v3f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX900-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX900-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f32__nsz:
@@ -1040,29 +1177,40 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v3f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_max_f32_e32 v7, v1, v4
-; GFX10-NEXT:    v_max_f32_e32 v8, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v3f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT:    v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v2 :: v_dual_max_f32 v0, v0, v3
+; GFX11-NEXT:    v_dual_max_f32 v1, v1, v4 :: v_dual_max_f32 v2, v2, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f32__nsz:
@@ -1139,55 +1287,76 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v4f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX900-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX900-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX900-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f32:
@@ -1202,35 +1371,52 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v4f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_max_f32_e32 v9, v1, v5
-; GFX10-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_max_f32_e32 v8, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v3, v3, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v4f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX11-NEXT:    v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v3 :: v_dual_max_f32 v2, v2, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v3, v3, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f32:
@@ -1313,55 +1499,76 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v4f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v4, v1, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX900-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX900-NEXT:    v_max_f32_e32 v4, v3, v7
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX900-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f32__nsz:
@@ -1376,35 +1583,52 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v4f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v8, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_max_f32_e32 v9, v1, v5
-; GFX10-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_max_f32_e32 v8, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v3, v3, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v4f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    v_max_f32_e32 v4, v2, v6
-; GFX11-NEXT:    v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v3 :: v_dual_max_f32 v2, v2, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v3, v3, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f32__nsz:
@@ -1487,91 +1711,136 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v8f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v16, v0, v8
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v1, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v2, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v3, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v4, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v5, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v6, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v8, v7, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_max_f32_e32 v4, v4, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_max_f32_e32 v5, v5, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX7-NEXT:    v_max_f32_e32 v7, v7, v8
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v8f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v16, v0, v8
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v1, v9
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v2, v10
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v3, v11
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v4, v12
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v5, v13
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX8-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v6, v14
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v8, v7, v15
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_max_f32_e32 v2, v2, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_max_f32_e32 v3, v3, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_max_f32_e32 v4, v4, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_max_f32_e32 v5, v5, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX8-NEXT:    v_max_f32_e32 v7, v7, v8
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v8f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v16, v0, v8
-; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v1, v9
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v2, v10
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v3, v11
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v4, v12
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v5, v13
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v6, v14
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX900-NEXT:    v_max_f32_e32 v8, v7, v15
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_max_f32_e32 v0, v0, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_max_f32_e32 v3, v3, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_max_f32_e32 v4, v4, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_max_f32_e32 v5, v5, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_max_f32_e32 v6, v6, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX900-NEXT:    v_max_f32_e32 v7, v7, v8
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v8f32:
@@ -1590,57 +1859,94 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
 ; GFX10-LABEL: v_maximum_v8f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v16, v0, v8
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT:    v_max_f32_e32 v17, v1, v9
-; GFX10-NEXT:    v_max_f32_e32 v8, v2, v10
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT:    v_max_f32_e32 v9, v3, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT:    v_max_f32_e32 v10, v7, v15
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT:    v_max_f32_e32 v8, v4, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT:    v_max_f32_e32 v9, v5, v13
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT:    v_max_f32_e32 v8, v6, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v10, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_max_f32_e32 v3, v3, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v12, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_max_f32_e32 v4, v4, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v13, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_max_f32_e32 v5, v5, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v14, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_max_f32_e32 v6, v6, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v15, v7, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v7, v7, v12
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v8f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT:    v_dual_max_f32 v8, v2, v10 :: v_dual_max_f32 v9, v3, v11
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT:    v_max_f32_e32 v10, v7, v15
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT:    v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT:    v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v8
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v9, v10, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_dual_max_f32 v1, v1, v8 :: v_dual_max_f32 v2, v2, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v9, v12, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX11-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_max_f32_e32 v4, v4, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v10, v13, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v11, v14, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_max_f32 v3, v3, v8 :: v_dual_max_f32 v6, v6, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v12, v15, v7, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v7, v7, v12
+; GFX11-NEXT:    v_max_f32_e32 v5, v5, v10
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v8f32:
@@ -1667,169 +1973,262 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
 ; GFX7-LABEL: v_maximum_v16f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX7-NEXT:    v_max_f32_e32 v0, v0, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_max_f32_e32 v1, v1, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_max_f32_e32 v2, v2, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX7-NEXT:    v_max_f32_e32 v3, v3, v16
 ; GFX7-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v17
-; GFX7-NEXT:    v_max_f32_e32 v1, v1, v17
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v18
-; GFX7-NEXT:    v_max_f32_e32 v2, v2, v18
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_max_f32_e32 v18, v13, v29
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v19
-; GFX7-NEXT:    v_max_f32_e32 v3, v3, v19
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v20
-; GFX7-NEXT:    v_max_f32_e32 v4, v4, v20
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v21
-; GFX7-NEXT:    v_max_f32_e32 v5, v5, v21
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v22
-; GFX7-NEXT:    v_max_f32_e32 v6, v6, v22
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v23
-; GFX7-NEXT:    v_max_f32_e32 v7, v7, v23
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX7-NEXT:    v_max_f32_e32 v8, v8, v24
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX7-NEXT:    v_max_f32_e32 v9, v9, v25
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX7-NEXT:    v_max_f32_e32 v10, v10, v26
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX7-NEXT:    v_max_f32_e32 v11, v11, v27
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX7-NEXT:    v_max_f32_e32 v12, v12, v28
-; GFX7-NEXT:    v_max_f32_e32 v19, v14, v30
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[40:41], v14, v30
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v17, v2, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v17, v3, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v17, v4, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v17, v5, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v17, v6, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v17, v7, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v17, v19, s[40:41]
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v20, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_max_f32_e32 v4, v4, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_max_f32_e32 v5, v5, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v22, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_max_f32_e32 v6, v6, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_max_f32_e32 v7, v7, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v24, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_max_f32_e32 v8, v8, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_max_f32_e32 v9, v9, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v26, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_max_f32_e32 v10, v10, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_max_f32_e32 v11, v11, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v28, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_max_f32_e32 v12, v12, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_max_f32_e32 v13, v13, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v30, v14, vcc
+; GFX7-NEXT:    v_max_f32_e32 v14, v14, v17
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v18, v15, v16
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v15, v16
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX7-NEXT:    v_max_f32_e32 v15, v15, v16
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v16f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX8-NEXT:    v_max_f32_e32 v0, v0, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_max_f32_e32 v1, v1, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_max_f32_e32 v2, v2, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX8-NEXT:    v_max_f32_e32 v3, v3, v16
 ; GFX8-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v17
-; GFX8-NEXT:    v_max_f32_e32 v1, v1, v17
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v18
-; GFX8-NEXT:    v_max_f32_e32 v2, v2, v18
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX8-NEXT:    v_max_f32_e32 v18, v13, v29
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v19
-; GFX8-NEXT:    v_max_f32_e32 v3, v3, v19
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v20
-; GFX8-NEXT:    v_max_f32_e32 v4, v4, v20
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v21
-; GFX8-NEXT:    v_max_f32_e32 v5, v5, v21
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v22
-; GFX8-NEXT:    v_max_f32_e32 v6, v6, v22
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v23
-; GFX8-NEXT:    v_max_f32_e32 v7, v7, v23
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX8-NEXT:    v_max_f32_e32 v8, v8, v24
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX8-NEXT:    v_max_f32_e32 v9, v9, v25
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX8-NEXT:    v_max_f32_e32 v10, v10, v26
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX8-NEXT:    v_max_f32_e32 v11, v11, v27
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX8-NEXT:    v_max_f32_e32 v12, v12, v28
-; GFX8-NEXT:    v_max_f32_e32 v19, v14, v30
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[40:41], v14, v30
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v17, v2, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v17, v3, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v17, v4, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v17, v5, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v17, v6, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v17, v7, s[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v17, v19, s[40:41]
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v20, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_max_f32_e32 v4, v4, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_max_f32_e32 v5, v5, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v22, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_max_f32_e32 v6, v6, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX8-NEXT:    v_max_f32_e32 v7, v7, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v24, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX8-NEXT:    v_max_f32_e32 v8, v8, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX8-NEXT:    v_max_f32_e32 v9, v9, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v26, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX8-NEXT:    v_max_f32_e32 v10, v10, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX8-NEXT:    v_max_f32_e32 v11, v11, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v28, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX8-NEXT:    v_max_f32_e32 v12, v12, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX8-NEXT:    v_max_f32_e32 v13, v13, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v30, v14, vcc
+; GFX8-NEXT:    v_max_f32_e32 v14, v14, v17
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v18, v15, v16
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v15, v16
-; GFX8-NEXT:    v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX8-NEXT:    v_max_f32_e32 v15, v15, v16
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v16f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX900-NEXT:    v_max_f32_e32 v0, v0, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_max_f32_e32 v1, v1, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX900-NEXT:    v_max_f32_e32 v3, v3, v16
 ; GFX900-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v17
-; GFX900-NEXT:    v_max_f32_e32 v1, v1, v17
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v18
-; GFX900-NEXT:    v_max_f32_e32 v2, v2, v18
-; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX900-NEXT:    v_max_f32_e32 v18, v13, v29
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v19
-; GFX900-NEXT:    v_max_f32_e32 v3, v3, v19
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v20
-; GFX900-NEXT:    v_max_f32_e32 v4, v4, v20
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v21
-; GFX900-NEXT:    v_max_f32_e32 v5, v5, v21
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v22
-; GFX900-NEXT:    v_max_f32_e32 v6, v6, v22
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v23
-; GFX900-NEXT:    v_max_f32_e32 v7, v7, v23
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX900-NEXT:    v_max_f32_e32 v8, v8, v24
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX900-NEXT:    v_max_f32_e32 v9, v9, v25
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX900-NEXT:    v_max_f32_e32 v10, v10, v26
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX900-NEXT:    v_max_f32_e32 v11, v11, v27
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX900-NEXT:    v_max_f32_e32 v12, v12, v28
-; GFX900-NEXT:    v_max_f32_e32 v19, v14, v30
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[40:41], v14, v30
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v17, v2, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v17, v3, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v17, v4, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v17, v5, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v17, v6, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v17, v7, s[16:17]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v17, v19, s[40:41]
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v20, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_max_f32_e32 v4, v4, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_max_f32_e32 v5, v5, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v22, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_max_f32_e32 v6, v6, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX900-NEXT:    v_max_f32_e32 v7, v7, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v24, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX900-NEXT:    v_max_f32_e32 v8, v8, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX900-NEXT:    v_max_f32_e32 v9, v9, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v26, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX900-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX900-NEXT:    v_max_f32_e32 v10, v10, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX900-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX900-NEXT:    v_max_f32_e32 v11, v11, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v28, v12, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX900-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX900-NEXT:    v_max_f32_e32 v12, v12, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX900-NEXT:    v_max_f32_e32 v13, v13, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v30, v14, vcc
+; GFX900-NEXT:    v_max_f32_e32 v14, v14, v17
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_max_f32_e32 v18, v15, v16
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v15, v16
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX900-NEXT:    v_max_f32_e32 v15, v15, v16
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v16f32:
@@ -1859,105 +2258,169 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    v_max_f32_e32 v32, v0, v16
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX10-NEXT:    v_max_f32_e32 v33, v1, v17
-; GFX10-NEXT:    v_max_f32_e32 v34, v2, v18
-; GFX10-NEXT:    v_max_f32_e32 v35, v3, v19
-; GFX10-NEXT:    v_max_f32_e32 v36, v4, v20
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT:    v_max_f32_e32 v37, v5, v21
-; GFX10-NEXT:    v_max_f32_e32 v38, v6, v22
-; GFX10-NEXT:    v_max_f32_e32 v39, v7, v23
-; GFX10-NEXT:    v_max_f32_e32 v48, v8, v24
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT:    v_max_f32_e32 v49, v9, v25
-; GFX10-NEXT:    v_max_f32_e32 v50, v10, v26
-; GFX10-NEXT:    v_max_f32_e32 v51, v11, v27
-; GFX10-NEXT:    v_max_f32_e32 v52, v12, v28
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT:    v_max_f32_e32 v53, v13, v29
-; GFX10-NEXT:    v_max_f32_e32 v54, v14, v30
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT:    v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT:    v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v19, v19
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v20, v20
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v21, v21
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v22, v22
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v23, v23
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v24, v24
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v25, v25
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v26, v26
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v27, v27
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v28, v28
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v29, v29
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v30, v30
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v1, v1, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_max_f32_e32 v3, v3, v19
+; GFX10-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_max_f32_e32 v4, v4, v20
+; GFX10-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_max_f32_e32 v5, v5, v21
+; GFX10-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_max_f32_e32 v6, v6, v22
+; GFX10-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_max_f32_e32 v7, v7, v23
+; GFX10-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_max_f32_e32 v8, v8, v24
+; GFX10-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_max_f32_e32 v9, v9, v25
+; GFX10-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_max_f32_e32 v10, v10, v26
+; GFX10-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_max_f32_e32 v11, v11, v27
+; GFX10-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_max_f32_e32 v12, v12, v28
+; GFX10-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_max_f32_e32 v13, v13, v29
+; GFX10-NEXT:    v_cndmask_b32_e32 v30, v30, v14, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v14, v14, v30
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v16, v15, v31
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v31, v31
+; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v31, v15, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v15, v15, v16
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v16f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX11-NEXT:    v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19
-; GFX11-NEXT:    v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT:    v_max_f32_e32 v54, v14, v30
-; GFX11-NEXT:    v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23
-; GFX11-NEXT:    v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT:    v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27
-; GFX11-NEXT:    v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT:    v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v16, v16
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v17, v17
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v18, v18
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v19, v19
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v20, v20
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v21, v21
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v22, v22
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v23, v23
+; GFX11-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v24, v24
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v25, v25
+; GFX11-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v26, v26
+; GFX11-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v27, v27
+; GFX11-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v28, v28
+; GFX11-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v29, v29
+; GFX11-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v30, v30
+; GFX11-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_max_f32 v1, v1, v17 :: v_dual_cndmask_b32 v18, v18, v2
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_dual_max_f32 v3, v3, v19 :: v_dual_cndmask_b32 v20, v20, v4
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_max_f32 v0, v0, v16 :: v_dual_max_f32 v5, v5, v21
+; GFX11-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX11-NEXT:    v_dual_max_f32 v4, v4, v20 :: v_dual_max_f32 v7, v7, v23
+; GFX11-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_max_f32 v6, v6, v22 :: v_dual_max_f32 v9, v9, v25
+; GFX11-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-NEXT:    v_dual_max_f32 v8, v8, v24 :: v_dual_max_f32 v11, v11, v27
+; GFX11-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_max_f32 v12, v12, v28 :: v_dual_cndmask_b32 v29, v29, v13
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX11-NEXT:    v_dual_max_f32 v10, v10, v26 :: v_dual_max_f32 v13, v13, v29
+; GFX11-NEXT:    v_cndmask_b32_e32 v30, v30, v14, vcc_lo
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v16, v15, v31
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v31, v31
+; GFX11-NEXT:    v_dual_max_f32 v2, v2, v18 :: v_dual_cndmask_b32 v15, v15, v31
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX11-NEXT:    v_cndmask_b32_e32 v16, v31, v15, vcc_lo
+; GFX11-NEXT:    v_dual_max_f32 v14, v14, v30 :: v_dual_max_f32 v15, v15, v16
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index 3280d7aa9ddfe..dff5ab548aff1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -12,61 +12,74 @@ define double @v_maximum_f64(double %src0, double %src1) {
 ; GFX7-LABEL: v_maximum_f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64:
@@ -130,61 +143,74 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) {
 ; GFX7-LABEL: v_maximum_f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64__nsz:
@@ -249,67 +275,80 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64__nnan_src0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64__nnan_src0:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64__nnan_src0:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64__nnan_src0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64__nnan_src0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64__nnan_src0:
@@ -333,67 +372,61 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64__nnan_src1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64__nnan_src1:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64__nnan_src1:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64__nnan_src1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64__nnan_src1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64__nnan_src1:
@@ -416,13 +449,17 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
 ; GFX7-LABEL: s_maximum_f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s18
-; GFX7-NEXT:    v_mov_b32_e32 v1, s19
-; GFX7-NEXT:    v_max_f64 v[2:3], s[16:17], v[0:1]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX7-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX7-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX7-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX7-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX7-NEXT:    s_cselect_b32 s6, s5, s19
+; GFX7-NEXT:    s_cselect_b32 s7, s4, s18
+; GFX7-NEXT:    v_mov_b32_e32 v0, s7
+; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_max_f64 v[0:1], s[4:5], v[0:1]
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:1]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -431,13 +468,17 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
 ; GFX8-LABEL: s_maximum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s18
-; GFX8-NEXT:    v_mov_b32_e32 v1, s19
-; GFX8-NEXT:    v_max_f64 v[2:3], s[16:17], v[0:1]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX8-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX8-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX8-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX8-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX8-NEXT:    s_cselect_b32 s6, s5, s19
+; GFX8-NEXT:    s_cselect_b32 s7, s4, s18
+; GFX8-NEXT:    v_mov_b32_e32 v0, s7
+; GFX8-NEXT:    v_mov_b32_e32 v1, s6
+; GFX8-NEXT:    v_max_f64 v[0:1], s[4:5], v[0:1]
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -446,13 +487,17 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
 ; GFX900-LABEL: s_maximum_f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s18
-; GFX900-NEXT:    v_mov_b32_e32 v1, s19
-; GFX900-NEXT:    v_max_f64 v[2:3], s[16:17], v[0:1]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX900-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX900-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX900-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX900-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX900-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT:    s_cselect_b32 s6, s5, s19
+; GFX900-NEXT:    s_cselect_b32 s7, s4, s18
+; GFX900-NEXT:    v_mov_b32_e32 v0, s7
+; GFX900-NEXT:    v_mov_b32_e32 v1, s6
+; GFX900-NEXT:    v_max_f64 v[0:1], s[4:5], v[0:1]
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:1]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -461,13 +506,17 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
 ; GFX950-LABEL: s_maximum_f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
-; GFX950-NEXT:    v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX950-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX950-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[0:1], s[0:1]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX950-NEXT:    s_cselect_b32 s2, s0, s2
+; GFX950-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-NEXT:    v_mov_b32_e32 v1, s3
+; GFX950-NEXT:    v_max_f64 v[0:1], s[0:1], v[0:1]
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; use v[0:1]
 ; GFX950-NEXT:    ;;#ASMEND
@@ -476,10 +525,15 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
 ; GFX10-LABEL: s_maximum_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[0:1], s[16:17], s[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[16:17], s[18:19]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s4
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[18:19], s[18:19]
+; GFX10-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX10-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, s[4:5], s[4:5]
+; GFX10-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s7, s5, s19
+; GFX10-NEXT:    s_cselect_b32 s6, s4, s18
+; GFX10-NEXT:    v_max_f64 v[0:1], s[4:5], s[6:7]
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:1]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -488,11 +542,16 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
 ; GFX11-LABEL: s_maximum_f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[2:3], s[2:3]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX11-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[0:1], s[0:1]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX11-NEXT:    s_cselect_b32 s2, s0, s2
 ; GFX11-NEXT:    v_max_f64 v[0:1], s[0:1], s[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:1]
 ; GFX11-NEXT:    ;;#ASMEND
@@ -519,86 +578,120 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
 ; GFX7-LABEL: v_maximum_v2f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX900-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s4
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v2f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v1 :: v_dual_cndmask_b32 v4, v4, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f64:
@@ -669,86 +762,120 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
 ; GFX7-LABEL: v_maximum_v2f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX900-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s4
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v2f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v1 :: v_dual_cndmask_b32 v4, v4, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f64__nsz:
@@ -819,19 +946,28 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX7-LABEL: s_maximum_v2f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s22
-; GFX7-NEXT:    v_mov_b32_e32 v1, s23
-; GFX7-NEXT:    v_max_f64 v[2:3], s[18:19], v[0:1]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX7-NEXT:    v_mov_b32_e32 v0, s20
-; GFX7-NEXT:    v_mov_b32_e32 v1, s21
-; GFX7-NEXT:    v_max_f64 v[4:5], s[16:17], v[0:1]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
-; GFX7-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v5, v6, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], s[22:23], s[22:23]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], s[20:21], s[20:21]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX7-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX7-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX7-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX7-NEXT:    s_cselect_b32 s10, s5, s23
+; GFX7-NEXT:    s_cselect_b32 s11, s4, s22
+; GFX7-NEXT:    s_and_b64 s[6:7], s[8:9], exec
+; GFX7-NEXT:    s_cselect_b32 s7, s21, s17
+; GFX7-NEXT:    s_cselect_b32 s6, s20, s16
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX7-NEXT:    v_mov_b32_e32 v0, s11
+; GFX7-NEXT:    v_mov_b32_e32 v1, s10
+; GFX7-NEXT:    v_max_f64 v[2:3], s[4:5], v[0:1]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; GFX7-NEXT:    s_cselect_b32 s4, s7, s21
+; GFX7-NEXT:    s_cselect_b32 s5, s6, s20
+; GFX7-NEXT:    v_mov_b32_e32 v0, s5
+; GFX7-NEXT:    v_mov_b32_e32 v1, s4
+; GFX7-NEXT:    v_max_f64 v[0:1], s[6:7], v[0:1]
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:3]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -840,19 +976,28 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX8-LABEL: s_maximum_v2f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s22
-; GFX8-NEXT:    v_mov_b32_e32 v1, s23
-; GFX8-NEXT:    v_max_f64 v[2:3], s[18:19], v[0:1]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v0, s20
-; GFX8-NEXT:    v_mov_b32_e32 v1, s21
-; GFX8-NEXT:    v_max_f64 v[4:5], s[16:17], v[0:1]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v5, v6, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], s[22:23], s[22:23]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], s[20:21], s[20:21]
+; GFX8-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX8-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX8-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX8-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX8-NEXT:    s_cselect_b32 s10, s5, s23
+; GFX8-NEXT:    s_cselect_b32 s11, s4, s22
+; GFX8-NEXT:    s_and_b64 s[6:7], s[8:9], exec
+; GFX8-NEXT:    s_cselect_b32 s7, s21, s17
+; GFX8-NEXT:    s_cselect_b32 s6, s20, s16
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX8-NEXT:    v_mov_b32_e32 v0, s11
+; GFX8-NEXT:    v_mov_b32_e32 v1, s10
+; GFX8-NEXT:    v_max_f64 v[2:3], s[4:5], v[0:1]
+; GFX8-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; GFX8-NEXT:    s_cselect_b32 s4, s7, s21
+; GFX8-NEXT:    s_cselect_b32 s5, s6, s20
+; GFX8-NEXT:    v_mov_b32_e32 v0, s5
+; GFX8-NEXT:    v_mov_b32_e32 v1, s4
+; GFX8-NEXT:    v_max_f64 v[0:1], s[6:7], v[0:1]
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:3]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -861,19 +1006,28 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX900-LABEL: s_maximum_v2f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s22
-; GFX900-NEXT:    v_mov_b32_e32 v1, s23
-; GFX900-NEXT:    v_max_f64 v[2:3], s[18:19], v[0:1]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX900-NEXT:    v_mov_b32_e32 v0, s20
-; GFX900-NEXT:    v_mov_b32_e32 v1, s21
-; GFX900-NEXT:    v_max_f64 v[4:5], s[16:17], v[0:1]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v1, v5, v6, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], s[22:23], s[22:23]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], s[20:21], s[20:21]
+; GFX900-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX900-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX900-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX900-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT:    s_cselect_b32 s10, s5, s23
+; GFX900-NEXT:    s_cselect_b32 s11, s4, s22
+; GFX900-NEXT:    s_and_b64 s[6:7], s[8:9], exec
+; GFX900-NEXT:    s_cselect_b32 s7, s21, s17
+; GFX900-NEXT:    s_cselect_b32 s6, s20, s16
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX900-NEXT:    v_mov_b32_e32 v0, s11
+; GFX900-NEXT:    v_mov_b32_e32 v1, s10
+; GFX900-NEXT:    v_max_f64 v[2:3], s[4:5], v[0:1]
+; GFX900-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; GFX900-NEXT:    s_cselect_b32 s4, s7, s21
+; GFX900-NEXT:    s_cselect_b32 s5, s6, s20
+; GFX900-NEXT:    v_mov_b32_e32 v0, s5
+; GFX900-NEXT:    v_mov_b32_e32 v1, s4
+; GFX900-NEXT:    v_max_f64 v[0:1], s[6:7], v[0:1]
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:3]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -882,18 +1036,28 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX950-LABEL: s_maximum_v2f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s3, s19, s3
+; GFX950-NEXT:    s_cselect_b32 s2, s18, s2
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s4, s3, s19
+; GFX950-NEXT:    s_cselect_b32 s5, s2, s18
+; GFX950-NEXT:    v_mov_b32_e32 v0, s5
+; GFX950-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX950-NEXT:    v_max_f64 v[2:3], s[2:3], v[0:1]
-; GFX950-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
-; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[16:17]
-; GFX950-NEXT:    v_max_f64 v[4:5], s[0:1], v[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], s[16:17], s[16:17]
+; GFX950-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX950-NEXT:    s_cselect_b32 s1, s17, s1
+; GFX950-NEXT:    s_cselect_b32 s0, s16, s0
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], s[0:1], s[0:1]
+; GFX950-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX950-NEXT:    s_cselect_b32 s2, s1, s17
+; GFX950-NEXT:    s_cselect_b32 s3, s0, s16
+; GFX950-NEXT:    v_mov_b32_e32 v0, s3
+; GFX950-NEXT:    v_mov_b32_e32 v1, s2
+; GFX950-NEXT:    v_max_f64 v[0:1], s[0:1], v[0:1]
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; use v[0:3]
 ; GFX950-NEXT:    ;;#ASMEND
@@ -902,14 +1066,24 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX10-LABEL: s_maximum_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[0:1], s[18:19], s[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[18:19], s[22:23]
-; GFX10-NEXT:    v_max_f64 v[4:5], s[16:17], s[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, s[16:17], s[20:21]
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v1, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s5
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[22:23], s[22:23]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, s[20:21], s[20:21]
+; GFX10-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX10-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, s[4:5], s[4:5]
+; GFX10-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s7, s5, s23
+; GFX10-NEXT:    s_cselect_b32 s6, s4, s22
+; GFX10-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s9, s21, s17
+; GFX10-NEXT:    s_cselect_b32 s8, s20, s16
+; GFX10-NEXT:    v_max_f64 v[2:3], s[4:5], s[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, s[8:9], s[8:9]
+; GFX10-NEXT:    s_and_b32 s10, s10, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s11, s9, s21
+; GFX10-NEXT:    s_cselect_b32 s10, s8, s20
+; GFX10-NEXT:    v_max_f64 v[0:1], s[8:9], s[10:11]
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:3]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -918,15 +1092,26 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX11-LABEL: s_maximum_v2f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[0:1], s[2:3], s[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, s[2:3], s[18:19]
-; GFX11-NEXT:    v_max_f64 v[4:5], s[0:1], s[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, s[0:1], s[16:17]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s0
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[18:19], s[18:19]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, s[16:17], s[16:17]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s3, s19, s3
+; GFX11-NEXT:    s_cselect_b32 s2, s18, s2
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[2:3], s[2:3]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s5, s3, s19
+; GFX11-NEXT:    s_cselect_b32 s4, s2, s18
+; GFX11-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s1, s17, s1
+; GFX11-NEXT:    s_cselect_b32 s0, s16, s0
+; GFX11-NEXT:    v_max_f64 v[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, s[0:1], s[0:1]
+; GFX11-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s7, s1, s17
+; GFX11-NEXT:    s_cselect_b32 s6, s0, s16
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], s[0:1], s[6:7]
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:3]
 ; GFX11-NEXT:    ;;#ASMEND
@@ -954,110 +1139,165 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
 ; GFX7-LABEL: v_maximum_v3f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX900-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX900-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT:    v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v8, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v11, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v10, v4, vcc
+; GFX950-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s5
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v3f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[10:11], v[10:11]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v1 :: v_dual_cndmask_b32 v6, v6, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s1
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f64:
@@ -1135,110 +1375,165 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
 ; GFX7-LABEL: v_maximum_v3f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX900-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX900-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT:    v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v8, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v11, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v10, v4, vcc
+; GFX950-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s5
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v3f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[10:11], v[10:11]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v1 :: v_dual_cndmask_b32 v6, v6, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s1
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f64__nsz:
@@ -1316,135 +1611,209 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
 ; GFX7-LABEL: v_maximum_v4f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX900-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX900-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX900-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX900-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT:    v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v11, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT:    v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v13, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX950-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX950-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s6
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v4f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[12:13], v[12:13]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[14:15], v[14:15]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v9, v9, v1 :: v_dual_cndmask_b32 v8, v8, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s2
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f64:
@@ -1529,135 +1898,209 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
 ; GFX7-LABEL: v_maximum_v4f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX900-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX900-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX900-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX900-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT:    v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v11, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT:    v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v13, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX950-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX950-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s6
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v4f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[12:13], v[12:13]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[14:15], v[14:15]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v9, v9, v1 :: v_dual_cndmask_b32 v8, v8, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s2
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f64__nsz:
@@ -1742,244 +2185,395 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX7-LABEL: v_maximum_v8f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
-; GFX7-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX7-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX7-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
-; GFX7-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX7-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX7-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX7-NEXT:    v_max_f64 v[6:7], v[6:7], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX7-NEXT:    v_max_f64 v[8:9], v[8:9], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX7-NEXT:    v_max_f64 v[10:11], v[10:11], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX7-NEXT:    v_max_f64 v[12:13], v[12:13], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX7-NEXT:    v_max_f64 v[14:15], v[14:15], v[16:17]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v8f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
-; GFX8-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX8-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX8-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
-; GFX8-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX8-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX8-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX8-NEXT:    v_max_f64 v[8:9], v[8:9], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX8-NEXT:    v_max_f64 v[10:11], v[10:11], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX8-NEXT:    v_max_f64 v[12:13], v[12:13], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX8-NEXT:    v_max_f64 v[14:15], v[14:15], v[16:17]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v8f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX900-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX900-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
-; GFX900-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX900-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX900-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
-; GFX900-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX900-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX900-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX900-NEXT:    v_max_f64 v[6:7], v[6:7], v[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX900-NEXT:    v_max_f64 v[8:9], v[8:9], v[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX900-NEXT:    v_max_f64 v[10:11], v[10:11], v[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX900-NEXT:    v_max_f64 v[12:13], v[12:13], v[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX900-NEXT:    v_max_f64 v[14:15], v[14:15], v[16:17]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v8f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    scratch_load_dword v31, off, s32
-; GFX950-NEXT:    v_mov_b32_e32 v54, 0x7ff80000
-; GFX950-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX950-NEXT:    v_max_f64 v[34:35], v[2:3], v[18:19]
-; GFX950-NEXT:    v_max_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX950-NEXT:    v_max_f64 v[38:39], v[6:7], v[22:23]
-; GFX950-NEXT:    v_max_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX950-NEXT:    v_max_f64 v[50:51], v[10:11], v[26:27]
-; GFX950-NEXT:    v_max_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v19, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX950-NEXT:    v_max_f64 v[4:5], v[4:5], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX950-NEXT:    v_max_f64 v[6:7], v[6:7], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX950-NEXT:    v_max_f64 v[8:9], v[8:9], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX950-NEXT:    v_max_f64 v[10:11], v[10:11], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX950-NEXT:    v_max_f64 v[12:13], v[12:13], v[16:17]
+; GFX950-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX950-NEXT:    v_max_f64 v[14:15], v[14:15], v[16:17]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v8f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
-; GFX10-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
-; GFX10-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
-; GFX10-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
-; GFX10-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
-; GFX10-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[18:19], v[18:19]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[20:21], v[20:21]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[22:23], v[22:23]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[24:25], v[24:25]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[26:27], v[26:27]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[28:29], v[28:29]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v21, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v23, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v25, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v27, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v29, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v20, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v22, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v24, s7
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v26, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v28, s9
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v19, v19, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v21, v21, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v23, v23, v7, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, v25, v9, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v27, v27, v11, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v29, v29, v13, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v18, v18, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v20, v20, v4, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v22, v22, v6, s6
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[16:17]
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, v24, v8, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v26, v26, v10, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v28, v28, v12, s9
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[18:19]
+; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[20:21]
+; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[22:23]
+; GFX10-NEXT:    v_max_f64 v[8:9], v[8:9], v[24:25]
+; GFX10-NEXT:    v_max_f64 v[10:11], v[10:11], v[26:27]
+; GFX10-NEXT:    v_max_f64 v[12:13], v[12:13], v[28:29]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[28:29], v[14:15], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[30:31], v[30:31]
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v31, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v30, s10
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, v31, v15, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, v30, v14, s10
+; GFX10-NEXT:    v_max_f64 v[14:15], v[14:15], v[16:17]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v8f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
-; GFX11-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
-; GFX11-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
-; GFX11-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
-; GFX11-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
-; GFX11-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[18:19], v[18:19]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[20:21], v[20:21]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[22:23], v[22:23]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[24:25], v[24:25]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[26:27], v[26:27]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[28:29], v[28:29]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v17 :: v_dual_cndmask_b32 v0, v0, v16
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v21, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v23, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v25, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v27, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v29, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v20, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v22, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v24, s3
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v26, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v28, s5
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX11-NEXT:    v_dual_cndmask_b32 v17, v17, v1 :: v_dual_cndmask_b32 v16, v16, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v19, v19, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v21, v21, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v23, v23, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v25, v25, v9, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v27, v27, v11, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v29, v29, v13, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v18, v18, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v20, v20, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v22, v22, v6, s2
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[16:17]
+; GFX11-NEXT:    v_cndmask_b32_e64 v24, v24, v8, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v26, v26, v10, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v28, v28, v12, s5
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[18:19]
+; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[20:21]
+; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[22:23]
+; GFX11-NEXT:    v_max_f64 v[8:9], v[8:9], v[24:25]
+; GFX11-NEXT:    v_max_f64 v[10:11], v[10:11], v[26:27]
+; GFX11-NEXT:    v_max_f64 v[12:13], v[12:13], v[28:29]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[28:29], v[14:15], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[30:31], v[30:31]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v31, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v30, s6
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v17, v31, v15, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v16, v30, v14, s6
+; GFX11-NEXT:    v_max_f64 v[14:15], v[14:15], v[16:17]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v8f64:
@@ -2010,118 +2604,165 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
+; GFX7-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX7-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
+; GFX7-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
+; GFX7-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX7-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX7-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX7-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX7-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX7-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX7-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX7-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX7-NEXT:    s_waitcnt vmcnt(12)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v0, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(10)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
 ; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX7-NEXT:    v_max_f64 v[6:7], v[6:7], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX7-NEXT:    v_max_f64 v[8:9], v[8:9], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX7-NEXT:    v_max_f64 v[10:11], v[10:11], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX7-NEXT:    v_max_f64 v[12:13], v[12:13], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v34, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v33, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v34, v34, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v33, v33, v2, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(10)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[35:36], v[35:36]
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[33:34]
+; GFX7-NEXT:    buffer_load_dword v34, off, s[0:3], s32
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v36, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v35, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v36, v36, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v35, v35, v4, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(9)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[37:38], v[37:38]
+; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[35:36]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v38, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v37, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v38, v38, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v37, v37, v6, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(7)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX7-NEXT:    v_max_f64 v[6:7], v[6:7], v[37:38]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v49, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v48, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(5)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX7-NEXT:    v_max_f64 v[8:9], v[8:9], v[48:49]
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v51, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v50, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v51, v51, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v50, v50, v10, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(3)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX7-NEXT:    v_max_f64 v[10:11], v[10:11], v[50:51]
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v53, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v52, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v53, v53, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v52, v52, v12, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(1)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_max_f64 v[12:13], v[12:13], v[52:53]
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v14, vcc
 ; GFX7-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v16, vcc
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[16:17], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX7-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v19, v19, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v19, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v18, vcc
 ; GFX7-NEXT:    v_max_f64 v[18:19], v[18:19], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v21, v21, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v20, v20, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v21, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v20, vcc
 ; GFX7-NEXT:    v_max_f64 v[20:21], v[20:21], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX7-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v23, v23, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v22, v22, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v23, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v22, vcc
 ; GFX7-NEXT:    v_max_f64 v[22:23], v[22:23], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX7-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v25, v25, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v24, v24, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v25, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v24, vcc
 ; GFX7-NEXT:    v_max_f64 v[24:25], v[24:25], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX7-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v27, v27, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v26, v26, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v27, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v26, vcc
 ; GFX7-NEXT:    v_max_f64 v[26:27], v[26:27], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX7-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc
 ; GFX7-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:124
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
-; GFX7-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
-; GFX7-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX7-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX7-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v33, v30, v31, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v34, v34, v32, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v34, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v33, vcc
+; GFX7-NEXT:    v_max_f64 v[30:31], v[33:34], v[31:32]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v16f64:
@@ -2129,118 +2770,165 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
+; GFX8-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX8-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
+; GFX8-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
+; GFX8-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX8-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX8-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX8-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX8-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX8-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX8-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX8-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX8-NEXT:    s_waitcnt vmcnt(12)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v0, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(10)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
 ; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX8-NEXT:    v_max_f64 v[8:9], v[8:9], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX8-NEXT:    v_max_f64 v[10:11], v[10:11], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX8-NEXT:    v_max_f64 v[12:13], v[12:13], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v34, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v33, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v34, v34, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v33, v33, v2, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(10)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[35:36], v[35:36]
+; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[33:34]
+; GFX8-NEXT:    buffer_load_dword v34, off, s[0:3], s32
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v36, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v35, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v36, v36, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v35, v35, v4, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(9)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[37:38], v[37:38]
+; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[35:36]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v38, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v37, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v38, v38, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v37, v37, v6, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(7)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[37:38]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v49, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v48, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(5)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX8-NEXT:    v_max_f64 v[8:9], v[8:9], v[48:49]
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v51, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v50, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v51, v51, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v50, v50, v10, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(3)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX8-NEXT:    v_max_f64 v[10:11], v[10:11], v[50:51]
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v53, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v52, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v53, v53, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v52, v52, v12, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(1)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_max_f64 v[12:13], v[12:13], v[52:53]
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v14, vcc
 ; GFX8-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v16, vcc
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[16:17], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v19, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v18, vcc
 ; GFX8-NEXT:    v_max_f64 v[18:19], v[18:19], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v21, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v20, vcc
 ; GFX8-NEXT:    v_max_f64 v[20:21], v[20:21], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX8-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v23, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v22, vcc
 ; GFX8-NEXT:    v_max_f64 v[22:23], v[22:23], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX8-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v25, v25, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v24, v24, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v25, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v24, vcc
 ; GFX8-NEXT:    v_max_f64 v[24:25], v[24:25], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX8-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v27, v27, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v26, v26, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v27, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v26, vcc
 ; GFX8-NEXT:    v_max_f64 v[26:27], v[26:27], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX8-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc
 ; GFX8-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:124
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
-; GFX8-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
-; GFX8-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX8-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX8-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v33, v30, v31, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v34, v34, v32, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v34, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v33, vcc
+; GFX8-NEXT:    v_max_f64 v[30:31], v[33:34], v[31:32]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v16f64:
@@ -2248,118 +2936,165 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
+; GFX900-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX900-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
+; GFX900-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
+; GFX900-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX900-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX900-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX900-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX900-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX900-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX900-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX900-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX900-NEXT:    s_waitcnt vmcnt(12)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v0, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(10)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
 ; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX900-NEXT:    v_max_f64 v[6:7], v[6:7], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX900-NEXT:    v_max_f64 v[8:9], v[8:9], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX900-NEXT:    v_max_f64 v[10:11], v[10:11], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX900-NEXT:    v_max_f64 v[12:13], v[12:13], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v34, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v33, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v34, v34, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v33, v33, v2, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(10)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[35:36], v[35:36]
+; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[33:34]
+; GFX900-NEXT:    buffer_load_dword v34, off, s[0:3], s32
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v36, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v35, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v36, v36, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v35, v35, v4, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(9)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[37:38], v[37:38]
+; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[35:36]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v38, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v37, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v38, v38, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v37, v37, v6, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(7)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX900-NEXT:    v_max_f64 v[6:7], v[6:7], v[37:38]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v49, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v48, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(5)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX900-NEXT:    v_max_f64 v[8:9], v[8:9], v[48:49]
+; GFX900-NEXT:    v_cndmask_b32_e32 v11, v11, v51, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v50, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v51, v51, v11, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v50, v50, v10, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(3)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX900-NEXT:    v_max_f64 v[10:11], v[10:11], v[50:51]
+; GFX900-NEXT:    v_cndmask_b32_e32 v13, v13, v53, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v12, v12, v52, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v53, v53, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v52, v52, v12, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(1)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_max_f64 v[12:13], v[12:13], v[52:53]
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v15, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, v14, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v14, vcc
 ; GFX900-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v17, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v16, vcc
 ; GFX900-NEXT:    v_max_f64 v[16:17], v[16:17], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX900-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v19, v19, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v18, v18, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v19, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v18, vcc
 ; GFX900-NEXT:    v_max_f64 v[18:19], v[18:19], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX900-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v21, v21, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v20, v20, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v21, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v20, vcc
 ; GFX900-NEXT:    v_max_f64 v[20:21], v[20:21], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX900-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v23, v23, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v22, v22, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v23, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v22, vcc
 ; GFX900-NEXT:    v_max_f64 v[22:23], v[22:23], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX900-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v25, v25, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v24, v24, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v25, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v24, vcc
 ; GFX900-NEXT:    v_max_f64 v[24:25], v[24:25], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX900-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v27, v27, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v26, v26, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v27, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v26, vcc
 ; GFX900-NEXT:    v_max_f64 v[26:27], v[26:27], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX900-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc
 ; GFX900-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX900-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:124
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
-; GFX900-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
-; GFX900-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX900-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX900-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX900-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX900-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v33, v30, v31, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v34, v34, v32, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v34, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v33, vcc
+; GFX900-NEXT:    v_max_f64 v[30:31], v[33:34], v[31:32]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v16f64:
@@ -2375,12 +3110,12 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX950-NEXT:    v_accvgpr_write_b32 a7, v47 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a8, v56 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:8
-; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:4
-; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:16
-; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:12
-; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:24
-; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT:    scratch_load_dword v51, off, s32 offset:8
+; GFX950-NEXT:    scratch_load_dword v50, off, s32 offset:4
+; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:16
+; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:12
+; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:24
+; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:20
 ; GFX950-NEXT:    scratch_load_dword v57, off, s32 offset:32
 ; GFX950-NEXT:    scratch_load_dword v56, off, s32 offset:28
 ; GFX950-NEXT:    scratch_load_dword v47, off, s32 offset:40
@@ -2395,270 +3130,375 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX950-NEXT:    scratch_load_dword v54, off, s32 offset:68
 ; GFX950-NEXT:    scratch_load_dword v53, off, s32 offset:80
 ; GFX950-NEXT:    scratch_load_dword v52, off, s32 offset:76
-; GFX950-NEXT:    scratch_load_dword v51, off, s32 offset:88
-; GFX950-NEXT:    scratch_load_dword v50, off, s32 offset:84
-; GFX950-NEXT:    scratch_load_dword v49, off, s32 offset:96
-; GFX950-NEXT:    scratch_load_dword v48, off, s32 offset:92
-; GFX950-NEXT:    scratch_load_dword v31, off, s32
 ; GFX950-NEXT:    scratch_load_dword v33, off, s32 offset:104
 ; GFX950-NEXT:    scratch_load_dword v32, off, s32 offset:100
+; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:96
+; GFX950-NEXT:    scratch_load_dword v49, off, s32 offset:88
+; GFX950-NEXT:    scratch_load_dword v48, off, s32 offset:84
+; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:92
 ; GFX950-NEXT:    v_accvgpr_write_b32 a10, v58 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a11, v59 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a12, v60 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a13, v61 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_write_b32 a14, v62 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_write_b32 a15, v63 ; Reload Reuse
-; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_max_f64 v[58:59], v[0:1], v[34:35]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[34:35]
-; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:112
-; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:108
-; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_max_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
-; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:120
-; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:116
-; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_max_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
-; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:128
-; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:124
-; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7ff80000
+; GFX950-NEXT:    s_waitcnt vmcnt(24)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v51, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v50, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_waitcnt vmcnt(22)
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[0:1], v[38:39], v[38:39]
+; GFX950-NEXT:    s_waitcnt vmcnt(20)
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], v[34:35], v[34:35]
+; GFX950-NEXT:    v_cndmask_b32_e32 v59, v51, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v58, v50, v0, vcc
+; GFX950-NEXT:    scratch_load_dword v51, off, s32 offset:112
+; GFX950-NEXT:    scratch_load_dword v50, off, s32 offset:108
+; GFX950-NEXT:    v_cndmask_b32_e64 v3, v3, v39, s[0:1]
+; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, v38, s[0:1]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e64 v5, v5, v35, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e64 v4, v4, v34, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e32 v39, v39, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v38, v38, v2, vcc
+; GFX950-NEXT:    v_max_f64 v[2:3], v[2:3], v[38:39]
+; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:120
+; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:116
+; GFX950-NEXT:    scratch_load_dword v31, off, s32
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    v_max_f64 v[0:1], v[0:1], v[58:59]
+; GFX950-NEXT:    v_accvgpr_read_b32 v59, a11 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v61, v35, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v60, v34, v4, vcc
+; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:128
+; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:124
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
-; GFX950-NEXT:    s_waitcnt vmcnt(23)
-; GFX950-NEXT:    v_max_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v57, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(21)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT:    v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(19)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(17)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX950-NEXT:    v_accvgpr_read_b32 v63, a15 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(15)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[56:57], v[56:57]
+; GFX950-NEXT:    v_max_f64 v[4:5], v[4:5], v[60:61]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v61, a13 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v57, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v56, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    v_accvgpr_read_b32 v60, a12 ; Reload Reuse
+; GFX950-NEXT:    v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v57, v57, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v56, v56, v6, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(23)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[46:47], v[46:47]
+; GFX950-NEXT:    v_max_f64 v[6:7], v[6:7], v[56:57]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v17, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(13)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v47, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v46, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v47, v47, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v46, v46, v8, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(21)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[44:45], v[44:45]
+; GFX950-NEXT:    v_max_f64 v[8:9], v[8:9], v[46:47]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v19, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(11)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v11, v45, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, v10, v44, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v46, a6 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v45, v45, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v44, v44, v10, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(19)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[42:43], v[42:43]
+; GFX950-NEXT:    v_max_f64 v[10:11], v[10:11], v[44:45]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v21, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(9)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v13, v43, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, v12, v42, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v43, v43, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v42, v42, v12, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(17)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[40:41], v[40:41]
+; GFX950-NEXT:    v_max_f64 v[12:13], v[12:13], v[42:43]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v23, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(6)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[24:25], v[32:33]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v15, v41, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, v14, v40, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v41, v41, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v40, v40, v14, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(15)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[54:55], v[54:55]
+; GFX950-NEXT:    v_max_f64 v[14:15], v[14:15], v[40:41]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v25, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v17, v55, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v16, v54, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v40, a0 ; Reload Reuse
-; GFX950-NEXT:    s_waitcnt vmcnt(4)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[26:27], v[34:35]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[34:35]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v55, v55, v17, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v54, v54, v16, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(13)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX950-NEXT:    v_max_f64 v[16:17], v[16:17], v[54:55]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v19, v19, v53, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v18, v18, v52, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v53, v53, v19, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v52, v52, v18, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(8)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX950-NEXT:    v_max_f64 v[18:19], v[18:19], v[52:53]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v21, v21, v49, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v20, v20, v48, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v49, v49, v21, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v48, v48, v20, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(7)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[36:37], v[36:37]
+; GFX950-NEXT:    v_max_f64 v[20:21], v[20:21], v[48:49]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v23, v23, v37, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v22, v22, v36, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v37, v37, v23, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v36, v36, v22, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[32:33], v[32:33]
+; GFX950-NEXT:    v_max_f64 v[22:23], v[22:23], v[36:37]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v25, v25, v33, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v24, v24, v32, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v33, v25, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v32, v24, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(5)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX950-NEXT:    v_max_f64 v[24:25], v[24:25], v[32:33]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v27, v27, v51, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v26, v26, v50, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v27, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(2)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v51, v27, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v50, v26, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(3)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[38:39], v[38:39]
+; GFX950-NEXT:    v_max_f64 v[26:27], v[26:27], v[32:33]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v29, v29, v39, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v28, v28, v38, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v39, v29, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v38, v28, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_max_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[34:35], v[34:35]
+; GFX950-NEXT:    v_max_f64 v[28:29], v[28:29], v[32:33]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v30, v30, v34, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v31, v31, v35, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v31, v1, v2, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v0, v58
-; GFX950-NEXT:    v_mov_b32_e32 v1, v59
-; GFX950-NEXT:    v_mov_b32_e32 v2, v60
-; GFX950-NEXT:    v_accvgpr_read_b32 v60, a12 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_read_b32 v59, a11 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v35, v31, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v34, v30, vcc
+; GFX950-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v16f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    s_clause 0x18
-; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX10-NEXT:    s_clause 0x1c
 ; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
+; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX10-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
 ; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
 ; GFX10-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
 ; GFX10-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX10-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX10-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:32
-; GFX10-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:28
-; GFX10-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:68
-; GFX10-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:64
-; GFX10-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:60
-; GFX10-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:56
-; GFX10-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:52
-; GFX10-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:48
-; GFX10-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:44
-; GFX10-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX10-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:100
-; GFX10-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:96
-; GFX10-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:92
-; GFX10-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:88
-; GFX10-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:84
-; GFX10-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:80
-; GFX10-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:76
-; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:72
+; GFX10-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX10-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX10-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX10-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX10-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX10-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX10-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX10-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:64
+; GFX10-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:60
+; GFX10-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:72
+; GFX10-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:68
+; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:80
+; GFX10-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:76
+; GFX10-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:88
+; GFX10-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:84
+; GFX10-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:96
+; GFX10-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:92
+; GFX10-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:104
+; GFX10-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:100
+; GFX10-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:112
+; GFX10-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:108
+; GFX10-NEXT:    buffer_load_dword v39, off, s[0:3], s32
+; GFX10-NEXT:    s_waitcnt vmcnt(27)
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[31:32], v[31:32]
+; GFX10-NEXT:    s_waitcnt vmcnt(25)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[33:34], v[33:34]
 ; GFX10-NEXT:    s_waitcnt vmcnt(23)
-; GFX10-NEXT:    v_max_f64 v[82:83], v[0:1], v[31:32]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32]
-; GFX10-NEXT:    s_waitcnt vmcnt(21)
-; GFX10-NEXT:    v_max_f64 v[84:85], v[2:3], v[33:34]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[33:34]
-; GFX10-NEXT:    s_waitcnt vmcnt(19)
-; GFX10-NEXT:    v_max_f64 v[32:33], v[4:5], v[35:36]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[35:36]
-; GFX10-NEXT:    s_clause 0x7
-; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:104
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116
-; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128
-; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:124
-; GFX10-NEXT:    s_waitcnt vmcnt(24)
-; GFX10-NEXT:    v_max_f64 v[34:35], v[6:7], v[48:49]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[48:49]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[35:36], v[35:36]
 ; GFX10-NEXT:    s_waitcnt vmcnt(21)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[52:53]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[37:38], v[37:38]
 ; GFX10-NEXT:    s_waitcnt vmcnt(19)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[54:55]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[48:49], v[48:49]
 ; GFX10-NEXT:    s_waitcnt vmcnt(17)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[64:65]
-; GFX10-NEXT:    s_waitcnt vmcnt(16)
-; GFX10-NEXT:    v_max_f64 v[48:49], v[8:9], v[37:38]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[37:38]
-; GFX10-NEXT:    v_max_f64 v[36:37], v[10:11], v[64:65]
-; GFX10-NEXT:    v_max_f64 v[38:39], v[12:13], v[54:55]
-; GFX10-NEXT:    v_max_f64 v[54:55], v[14:15], v[52:53]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[50:51], v[50:51]
+; GFX10-NEXT:    s_waitcnt vmcnt(15)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[52:53], v[52:53]
+; GFX10-NEXT:    s_waitcnt vmcnt(13)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[54:55], v[54:55]
 ; GFX10-NEXT:    s_waitcnt vmcnt(11)
-; GFX10-NEXT:    v_max_f64 v[64:65], v[20:21], v[70:71]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[64:65], v[64:65]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v34, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v33, s4
 ; GFX10-NEXT:    s_waitcnt vmcnt(9)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
-; GFX10-NEXT:    s_waitcnt vmcnt(8)
-; GFX10-NEXT:    v_max_f64 v[52:53], v[16:17], v[50:51]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[16:17], v[50:51]
-; GFX10-NEXT:    v_max_f64 v[50:51], v[18:19], v[80:81]
-; GFX10-NEXT:    v_max_f64 v[70:71], v[22:23], v[68:69]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v34, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v35, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v48, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v49, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v36, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v37, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v38, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v54, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v55, 0x7ff80000, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, v52, 0, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v17, v53, 0x7ff80000, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v18, v50, 0, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v19, v51, 0x7ff80000, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v20, v64, 0, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v21, v65, 0x7ff80000, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v22, v70, 0, s14
-; GFX10-NEXT:    v_cndmask_b32_e64 v23, v71, 0x7ff80000, s14
-; GFX10-NEXT:    s_waitcnt vmcnt(6)
-; GFX10-NEXT:    v_max_f64 v[68:69], v[24:25], v[66:67]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[66:67], v[66:67]
+; GFX10-NEXT:    s_waitcnt vmcnt(7)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[68:69], v[68:69]
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v36, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v38, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v49, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v48, s7
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v51, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v50, s8
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v35, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v37, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v53, s9
 ; GFX10-NEXT:    s_waitcnt vmcnt(5)
-; GFX10-NEXT:    v_max_f64 v[66:67], v[26:27], v[0:1]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s16, v[26:27], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[70:71], v[70:71]
 ; GFX10-NEXT:    s_waitcnt vmcnt(3)
-; GFX10-NEXT:    v_max_f64 v[80:81], v[28:29], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[80:81], v[80:81]
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v52, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v55, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, v17, v65, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v54, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, v16, v64, s11
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v19, v19, v67, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v66, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cndmask_b32_e64 v21, v21, v69, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v20, v20, v68, s4
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e64 v32, v32, v1, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v31, v31, v0, s7
+; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e64 v34, v34, v3, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v33, v33, v2, s8
+; GFX10-NEXT:    v_cmp_u_f64_e64 s12, v[18:19], v[18:19]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX10-NEXT:    v_cndmask_b32_e64 v23, v23, v71, s5
+; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[33:34]
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:128
+; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:124
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, v25, v81, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v22, v22, v70, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, v24, v80, s6
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[16:17], v[16:17]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s13, v[20:21], v[20:21]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[22:23], v[22:23]
+; GFX10-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc_lo
+; GFX10-NEXT:    s_waitcnt vmcnt(5)
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[82:83], v[82:83]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s14, v[24:25], v[24:25]
+; GFX10-NEXT:    v_cndmask_b32_e64 v53, v53, v13, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v52, v52, v12, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v36, v36, v5, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v35, v35, v4, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v38, v38, v7, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v37, v37, v6, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v51, v51, v11, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v50, v50, v10, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v67, v67, v19, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v66, v66, v18, s12
+; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[35:36]
+; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[37:38]
+; GFX10-NEXT:    v_max_f64 v[8:9], v[8:9], v[48:49]
+; GFX10-NEXT:    v_max_f64 v[10:11], v[10:11], v[50:51]
+; GFX10-NEXT:    v_cndmask_b32_e64 v55, v55, v15, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v54, v54, v14, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v65, v65, v17, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v64, v64, v16, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v69, v69, v21, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v68, v68, v20, s13
+; GFX10-NEXT:    v_cndmask_b32_e32 v27, v27, v83, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v26, v26, v82, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v71, v71, v23, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v70, v70, v22, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v81, v81, v25, s14
+; GFX10-NEXT:    v_cndmask_b32_e64 v80, v80, v24, s14
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[26:27], v[26:27]
+; GFX10-NEXT:    v_max_f64 v[12:13], v[12:13], v[52:53]
+; GFX10-NEXT:    v_max_f64 v[14:15], v[14:15], v[54:55]
+; GFX10-NEXT:    v_max_f64 v[16:17], v[16:17], v[64:65]
+; GFX10-NEXT:    v_max_f64 v[18:19], v[18:19], v[66:67]
+; GFX10-NEXT:    v_max_f64 v[20:21], v[20:21], v[68:69]
+; GFX10-NEXT:    v_max_f64 v[22:23], v[22:23], v[70:71]
+; GFX10-NEXT:    v_max_f64 v[24:25], v[24:25], v[80:81]
+; GFX10-NEXT:    v_cndmask_b32_e32 v83, v83, v27, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v82, v82, v26, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[26:27], v[26:27], v[82:83]
+; GFX10-NEXT:    s_waitcnt vmcnt(2)
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[31:32], v[31:32]
+; GFX10-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[86:87], v[30:31], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s18, v[30:31], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v82, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v83, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v84, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v85, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v32, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v33, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v24, v68, 0, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v26, v66, 0, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v28, v80, 0, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v29, v81, 0x7ff80000, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v30, v86, 0, s18
-; GFX10-NEXT:    v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[33:34], v[33:34]
+; GFX10-NEXT:    v_cndmask_b32_e32 v84, v30, v33, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v85, v39, v34, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[28:29], v[28:29]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[84:85], v[84:85]
+; GFX10-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v34, v34, v85, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v33, v33, v84, s4
+; GFX10-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
+; GFX10-NEXT:    v_max_f64 v[30:31], v[84:85], v[33:34]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v16f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_clause 0x1f
-; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:8
-; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
-; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:16
-; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:12
-; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:24
-; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:20
-; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:32
-; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:28
-; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:40
-; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:36
-; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:48
-; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:44
-; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:56
-; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:52
-; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:64
-; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:60
+; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:64
+; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:56
+; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:48
+; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:40
+; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:32
+; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:24
+; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:16
+; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:8
+; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:4
+; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:12
+; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:20
+; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:28
+; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:36
+; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:44
+; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:52
+; GFX11-NEXT:    scratch_load_b32 v31, off, s32 offset:60
 ; GFX11-NEXT:    scratch_load_b32 v65, off, s32 offset:72
 ; GFX11-NEXT:    scratch_load_b32 v64, off, s32 offset:68
 ; GFX11-NEXT:    scratch_load_b32 v67, off, s32 offset:80
@@ -2675,86 +3515,135 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX11-NEXT:    scratch_load_b32 v84, off, s32 offset:116
 ; GFX11-NEXT:    scratch_load_b32 v87, off, s32 offset:128
 ; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:124
-; GFX11-NEXT:    s_waitcnt vmcnt(30)
-; GFX11-NEXT:    v_max_f64 v[96:97], v[0:1], v[32:33]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
-; GFX11-NEXT:    s_waitcnt vmcnt(28)
-; GFX11-NEXT:    v_max_f64 v[32:33], v[2:3], v[34:35]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
-; GFX11-NEXT:    s_waitcnt vmcnt(26)
-; GFX11-NEXT:    v_max_f64 v[34:35], v[4:5], v[36:37]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT:    scratch_load_b32 v39, off, s32
 ; GFX11-NEXT:    s_waitcnt vmcnt(24)
-; GFX11-NEXT:    v_max_f64 v[36:37], v[6:7], v[38:39]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[54:55], v[54:55]
+; GFX11-NEXT:    s_waitcnt vmcnt(23)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[52:53], v[52:53]
 ; GFX11-NEXT:    s_waitcnt vmcnt(22)
-; GFX11-NEXT:    v_max_f64 v[38:39], v[8:9], v[48:49]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[50:51], v[50:51]
+; GFX11-NEXT:    s_waitcnt vmcnt(21)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[48:49], v[48:49]
 ; GFX11-NEXT:    s_waitcnt vmcnt(20)
-; GFX11-NEXT:    v_max_f64 v[48:49], v[10:11], v[50:51]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[37:38], v[37:38]
+; GFX11-NEXT:    s_waitcnt vmcnt(19)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[35:36], v[35:36]
 ; GFX11-NEXT:    s_waitcnt vmcnt(18)
-; GFX11-NEXT:    v_max_f64 v[50:51], v[12:13], v[52:53]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
-; GFX11-NEXT:    s_waitcnt vmcnt(16)
-; GFX11-NEXT:    v_max_f64 v[52:53], v[14:15], v[54:55]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
-; GFX11-NEXT:    s_waitcnt vmcnt(14)
-; GFX11-NEXT:    v_max_f64 v[54:55], v[16:17], v[64:65]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
-; GFX11-NEXT:    s_waitcnt vmcnt(12)
-; GFX11-NEXT:    v_max_f64 v[64:65], v[18:19], v[66:67]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
-; GFX11-NEXT:    s_waitcnt vmcnt(10)
-; GFX11-NEXT:    v_max_f64 v[66:67], v[20:21], v[68:69]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
-; GFX11-NEXT:    s_waitcnt vmcnt(8)
-; GFX11-NEXT:    v_max_f64 v[68:69], v[22:23], v[70:71]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
-; GFX11-NEXT:    s_waitcnt vmcnt(6)
-; GFX11-NEXT:    v_max_f64 v[70:71], v[24:25], v[80:81]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
-; GFX11-NEXT:    s_waitcnt vmcnt(4)
-; GFX11-NEXT:    v_max_f64 v[80:81], v[26:27], v[82:83]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_max_f64 v[82:83], v[28:29], v[84:85]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[33:34], v[33:34]
+; GFX11-NEXT:    s_waitcnt vmcnt(17)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[31:32], v[31:32]
+; GFX11-NEXT:    s_waitcnt vmcnt(15)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[64:65], v[64:65]
+; GFX11-NEXT:    s_waitcnt vmcnt(13)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[66:67], v[66:67]
+; GFX11-NEXT:    s_waitcnt vmcnt(11)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[68:69], v[68:69]
+; GFX11-NEXT:    s_waitcnt vmcnt(9)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[70:71], v[70:71]
+; GFX11-NEXT:    s_waitcnt vmcnt(7)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[80:81], v[80:81]
+; GFX11-NEXT:    s_waitcnt vmcnt(5)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[82:83], v[82:83]
+; GFX11-NEXT:    s_waitcnt vmcnt(3)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[84:85], v[84:85]
+; GFX11-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[86:87], v[86:87]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v55 :: v_dual_cndmask_b32 v0, v0, v54
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v53, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v51, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v49, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v38, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v36, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v34, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v17, v17, v65, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v52, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v19, v19, v67, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v50, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v21, v21, v69, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v48, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v23, v23, v71, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v37, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v25, v25, v81, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v35, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v27, v27, v83, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v33, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v29, v29, v85, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v31, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v96, v30, v86, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v16, v16, v64, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v18, v18, v66, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v20, v20, v68, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v22, v22, v70, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v24, v24, v80, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v26, v26, v82, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v28, v28, v84, s13
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[84:85], v[30:31], v[86:87]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v96, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v32, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v34, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v36, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v38, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v48, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v50, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v52, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v16, v54, 0, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v18, v64, 0, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v20, v66, 0, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v22, v68, 0, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v24, v70, 0, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v26, v80, 0, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v28, v82, 0, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v30, v84, 0, s14
-; GFX11-NEXT:    v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v97, v39, v87, s14
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[16:17], v[16:17]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[18:19], v[18:19]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[20:21], v[20:21]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[22:23], v[22:23]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[24:25], v[24:25]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[26:27], v[26:27]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[28:29], v[28:29]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[96:97], v[96:97]
+; GFX11-NEXT:    v_cndmask_b32_e32 v39, v55, v1, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v53, v53, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v51, v51, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v49, v49, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v55, v38, v9, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v36, v36, v11, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v34, v34, v13, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v32, v32, v15, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v65, v65, v17, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v67, v67, v19, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v69, v69, v21, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v71, v71, v23, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v81, v81, v25, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v83, v83, v27, s12
+; GFX11-NEXT:    v_cndmask_b32_e32 v38, v54, v0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v85, v85, v29, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v52, v52, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v87, v87, v97, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v50, v50, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v48, v48, v6, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v54, v37, v8, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v35, v35, v10, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v33, v33, v12, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v31, v31, v14, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v64, v64, v16, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v66, v66, v18, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v68, v68, v20, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v70, v70, v22, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v80, v80, v24, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v82, v82, v26, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v84, v84, v28, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v86, v86, v96, s14
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[38:39]
+; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[52:53]
+; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[50:51]
+; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[48:49]
+; GFX11-NEXT:    v_max_f64 v[8:9], v[8:9], v[54:55]
+; GFX11-NEXT:    v_max_f64 v[10:11], v[10:11], v[35:36]
+; GFX11-NEXT:    v_max_f64 v[12:13], v[12:13], v[33:34]
+; GFX11-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX11-NEXT:    v_max_f64 v[16:17], v[16:17], v[64:65]
+; GFX11-NEXT:    v_max_f64 v[18:19], v[18:19], v[66:67]
+; GFX11-NEXT:    v_max_f64 v[20:21], v[20:21], v[68:69]
+; GFX11-NEXT:    v_max_f64 v[22:23], v[22:23], v[70:71]
+; GFX11-NEXT:    v_max_f64 v[24:25], v[24:25], v[80:81]
+; GFX11-NEXT:    v_max_f64 v[26:27], v[26:27], v[82:83]
+; GFX11-NEXT:    v_max_f64 v[28:29], v[28:29], v[84:85]
+; GFX11-NEXT:    v_max_f64 v[30:31], v[96:97], v[86:87]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index b5dab396f0bf1..743df5cbc4feb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -14,19 +14,21 @@ define half @v_minimum_f16(half %src0, half %src1) {
 ; GFX8-LABEL: v_minimum_f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f16:
@@ -38,27 +40,33 @@ define half @v_minimum_f16(half %src0, half %src1) {
 ; GFX10-LABEL: v_minimum_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_minimum_f16:
@@ -142,19 +150,21 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
 ; GFX8-LABEL: v_minimum_f16__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f16__nsz:
@@ -166,27 +176,33 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
 ; GFX10-LABEL: v_minimum_f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_minimum_f16__nsz:
@@ -271,20 +287,22 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f16__nnan_src0:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f16__nnan_src0:
@@ -298,29 +316,37 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_f16__nnan_src0:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_f16__nnan_src0:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src0:
@@ -356,20 +382,18 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f16__nnan_src1:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f16__nnan_src1:
@@ -383,29 +407,29 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_f16__nnan_src1:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.h, 1.0, v1.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_f16__nnan_src1:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src1:
@@ -440,12 +464,13 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX8-LABEL: s_minimum_f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s17
-; GFX8-NEXT:    v_min_f16_e32 v1, s16, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s16
+; GFX8-NEXT:    v_mov_b32_e32 v1, s17
+; GFX8-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -454,12 +479,13 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX900-LABEL: s_minimum_f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s17
-; GFX900-NEXT:    v_min_f16_e32 v1, s16, v0
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v1, s17
+; GFX900-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -480,9 +506,12 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX10-LABEL: s_minimum_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f16_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v0, s17
+; GFX10-NEXT:    v_cmp_u_f16_e64 vcc_lo, s17, s17
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s17, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
@@ -492,10 +521,14 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_minimum_f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
-; GFX11-TRUE16-NEXT:    v_min_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s1, s1
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s0, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s1, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX11-TRUE16-NEXT:    ; use v0
@@ -505,10 +538,14 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-FAKE16-LABEL: s_minimum_f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_min_f16_e64 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s1, s1
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
@@ -543,28 +580,35 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT:    v_min_f16_e32 v2, v3, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v2, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v4, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_perm_b32 v0, v3, v0, s4
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f16:
@@ -576,40 +620,56 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v2, s4
-; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v1, v1
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s4
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v1.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v2f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v3, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f16:
@@ -670,28 +730,35 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT:    v_min_f16_e32 v2, v3, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v2, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v4, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_perm_b32 v0, v3, v0, s4
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f16__nsz:
@@ -703,40 +770,56 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v2f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v2, s4
-; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v1, v1
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s4
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v2f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v1.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v2f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v3, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f16__nsz:
@@ -796,18 +879,23 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX8-LABEL: s_minimum_v2f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_lshr_b32 s4, s17, 16
-; GFX8-NEXT:    s_lshr_b32 s5, s16, 16
+; GFX8-NEXT:    s_lshr_b32 s4, s16, 16
+; GFX8-NEXT:    s_lshr_b32 s5, s17, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s5, v0
-; GFX8-NEXT:    v_min_f16_e32 v0, s5, v0
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7e00
+; GFX8-NEXT:    v_mov_b32_e32 v1, s5
+; GFX8-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_mov_b32_e32 v1, s16
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s17
-; GFX8-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v3, s16, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v2
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -816,19 +904,25 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX900-LABEL: s_minimum_v2f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s17
-; GFX900-NEXT:    v_mov_b32_e32 v1, s17
-; GFX900-NEXT:    s_lshr_b32 s4, s17, 16
-; GFX900-NEXT:    v_pk_min_f16 v1, s16, v1
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX900-NEXT:    s_lshr_b32 s5, s16, 16
-; GFX900-NEXT:    v_mov_b32_e32 v3, s4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s5, v3
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX900-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX900-NEXT:    s_lshr_b32 s4, s16, 16
+; GFX900-NEXT:    s_lshr_b32 s5, s17, 16
+; GFX900-NEXT:    v_mov_b32_e32 v0, s4
+; GFX900-NEXT:    v_mov_b32_e32 v1, s5
+; GFX900-NEXT:    v_cmp_u_f16_e64 vcc, s5, s5
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_mov_b32_e32 v2, s16
+; GFX900-NEXT:    v_mov_b32_e32 v3, s17
+; GFX900-NEXT:    v_cmp_u_f16_e64 vcc, s17, s17
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v2, vcc
+; GFX900-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX900-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX900-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
+; GFX900-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -848,16 +942,23 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX10-LABEL: s_minimum_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, s17
+; GFX10-NEXT:    v_cmp_u_f16_e64 vcc_lo, s17, s17
 ; GFX10-NEXT:    s_lshr_b32 s4, s17, 16
 ; GFX10-NEXT:    s_lshr_b32 s5, s16, 16
-; GFX10-NEXT:    v_pk_min_f16 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s5, s4
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, s16, s17
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x7e00, v0, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v2
-; GFX10-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX10-NEXT:    v_mov_b32_e32 v1, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e64 vcc_lo, s4, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, s17, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, s4, v1, vcc_lo
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -866,15 +967,23 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_minimum_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, s0, s1
 ; GFX11-TRUE16-NEXT:    s_lshr_b32 s2, s1, 16
-; GFX11-TRUE16-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, s0, s1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, s3, s2
-; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v1.l, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s4, s1, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, s2, s2
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, s1
+; GFX11-TRUE16-NEXT:    s_lshr_b32 s5, s0, 16
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, s5, v0.l, s3
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, s0, v0.h, s4
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s2, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s1, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v1, v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX11-TRUE16-NEXT:    ; use v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMEND
@@ -883,18 +992,26 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-FAKE16-LABEL: s_minimum_v2f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s1, s1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s2, s2
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, s1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, s2, v1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMEND
@@ -923,34 +1040,53 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_min_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v6, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f16:
@@ -963,51 +1099,94 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v3f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v5, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v3f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v3f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f16:
@@ -1074,34 +1253,53 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_min_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v6, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f16__nsz:
@@ -1114,51 +1312,94 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v3f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v5, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v3f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v3f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f16__nsz:
@@ -1225,44 +1466,61 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_min_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v8, v7, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v7, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
-; GFX8-NEXT:    v_min_f16_e32 v7, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f16:
@@ -1275,62 +1533,94 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v4f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v6, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v5, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_e64 s5, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v4, v6, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v4, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v5, s5
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v6, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v4f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v5, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.h, v3.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v5.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v5.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v4.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v4f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v7, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f16:
@@ -1399,44 +1689,61 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
-; GFX8-NEXT:    v_min_f16_e32 v4, v5, v4
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT:    v_cndmask_b32_sdwa v4, v5, v4, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_min_f16_e32 v8, v7, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v7, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
-; GFX8-NEXT:    v_min_f16_e32 v7, v1, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f16__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v5, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_perm_b32 v0, v5, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f16__nsz:
@@ -1449,62 +1756,94 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v4f16__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v6, v1, v3
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v5, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_e64 s5, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v4, v6, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v4, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v5, s5
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v6, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v3, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s4
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX10-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v4f16__nsz:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v5, v0, v2
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.h, v2.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.h, v3.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v5.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v5.h, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v4.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v4f16__nsz:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v7, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v7, v3, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f16__nsz:
@@ -1572,82 +1911,116 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX8-LABEL: v_minimum_v8f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
-; GFX8-NEXT:    v_min_f16_e32 v10, v9, v8
-; GFX8-NEXT:    v_mov_b32_e32 v11, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v9, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc
-; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX8-NEXT:    v_min_f16_e32 v12, v10, v9
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v10, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v9, v11, v12, vcc
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
-; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v1
-; GFX8-NEXT:    v_min_f16_e32 v13, v12, v10
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v12, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v10, v11, v13, vcc
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX8-NEXT:    v_min_f16_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc
+; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
+; GFX8-NEXT:    v_min_f16_sdwa v9, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v14, v13, v12
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v13, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v12, v11, v14, vcc
-; GFX8-NEXT:    v_min_f16_e32 v13, v3, v7
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v13, vcc
-; GFX8-NEXT:    v_min_f16_e32 v7, v2, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v11, v7, vcc
-; GFX8-NEXT:    v_min_f16_e32 v6, v1, v5
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v6, vcc
-; GFX8-NEXT:    v_min_f16_e32 v5, v0, v4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v11, v5, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v9
-; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
-; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_sdwa v10, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v11, v11, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v3, v3, v7
+; GFX8-NEXT:    v_min_f16_e32 v2, v2, v6
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v5
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v4
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v11
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v10
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v9
+; GFX8-NEXT:    v_or_b32_e32 v3, v3, v8
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v8f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v8, v3, v7
-; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
-; GFX900-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX900-NEXT:    v_pk_min_f16 v7, v2, v6
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
-; GFX900-NEXT:    v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX900-NEXT:    v_pk_min_f16 v6, v1, v5
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX900-NEXT:    v_pk_min_f16 v5, v0, v4
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v6, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v7, s4
-; GFX900-NEXT:    v_perm_b32 v2, v2, v8, s4
-; GFX900-NEXT:    v_perm_b32 v3, v3, v10, s4
+; GFX900-NEXT:    v_perm_b32 v4, v9, v4, s4
+; GFX900-NEXT:    v_perm_b32 v0, v8, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v5, v8, v5, s4
+; GFX900-NEXT:    v_perm_b32 v1, v4, v1, s4
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
+; GFX900-NEXT:    v_perm_b32 v5, v5, v6, s4
+; GFX900-NEXT:    v_perm_b32 v2, v4, v2, s4
+; GFX900-NEXT:    v_pk_min_f16 v2, v2, v5
+; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v7
+; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v7, v3, vcc
+; GFX900-NEXT:    v_perm_b32 v5, v5, v6, s4
+; GFX900-NEXT:    v_perm_b32 v3, v4, v3, s4
+; GFX900-NEXT:    v_pk_min_f16 v3, v3, v5
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v8f16:
@@ -1662,102 +2035,170 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v8f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v8, v3, v7
-; GFX10-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX10-NEXT:    v_pk_min_f16 v10, v2, v6
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s5, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v12, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v11, v9, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cmp_o_f16_sdwa s4, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_sdwa v6, v9, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v10, v1, v5
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s5
-; GFX10-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_sdwa v13, v9, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v12, vcc_lo
-; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v9, v12, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_perm_b32 v1, v13, v1, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
+; GFX10-NEXT:    v_lshrrev_b32_e32 v13, 16, v7
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX10-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v5, v10, v5, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v3, v12, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v7, v13, v7, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v3, v3, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v8f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3.l, v7.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v8, v3, v7
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.l, v6.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v9, v2, v6
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v4.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v10, v0, v4
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v8.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3.h, v7.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v9.l, s0
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v1.l, v5.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v7, v1, v5
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v0.h, v4.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v1.h, v5.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v2.h, v6.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v10.l, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v7.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v10.h, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v7.h, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v9.h, s4
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v8.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4.h, v4.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v4.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v5.h, v5.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v5.l, v5.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v7.l, v7.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v4.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v4.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v5.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v5.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v7.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, v0.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, v1.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6.h, v6.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v6.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v7.h, v7.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v7.l, v3.l, s3
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v6.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v7.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, v2.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v7.h, v3.h, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v3, v3, v7
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v8f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v8, v3, v7
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v10, v2, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v7
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v14, v1, v5
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v11, v0, v4
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v10, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v13, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v12, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v13, v7, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v3, v3, v7
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v8f16:
@@ -1780,150 +2221,220 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX8-LABEL: v_minimum_v16f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 16, v15
+; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v16, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v17, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; GFX8-NEXT:    v_min_f16_e32 v16, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
-; GFX8-NEXT:    v_min_f16_e32 v20, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[4:5], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v12
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v4
-; GFX8-NEXT:    v_min_f16_e32 v21, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[6:7], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v11
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
-; GFX8-NEXT:    v_min_f16_e32 v22, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[8:9], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v2
-; GFX8-NEXT:    v_min_f16_e32 v23, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[10:11], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v9
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
-; GFX8-NEXT:    v_mov_b32_e32 v19, 0x7e00
-; GFX8-NEXT:    v_min_f16_e32 v24, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[12:13], v18, v17
-; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
-; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v25, v18, v17
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[14:15], v18, v17
-; GFX8-NEXT:    v_min_f16_e32 v17, v6, v14
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[16:17], v6, v14
-; GFX8-NEXT:    v_min_f16_e32 v6, v5, v13
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[18:19], v5, v13
-; GFX8-NEXT:    v_min_f16_e32 v5, v4, v12
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[20:21], v4, v12
-; GFX8-NEXT:    v_min_f16_e32 v4, v3, v11
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[22:23], v3, v11
-; GFX8-NEXT:    v_min_f16_e32 v3, v2, v10
-; GFX8-NEXT:    v_min_f16_e32 v11, v7, v15
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[24:25], v7, v15
-; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v15
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v14, v19, v16, vcc
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
-; GFX8-NEXT:    v_min_f16_e32 v13, v7, v12
-; GFX8-NEXT:    v_cmp_o_f16_e64 s[26:27], v7, v12
-; GFX8-NEXT:    v_min_f16_e32 v7, v1, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v19, v3, vcc
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
-; GFX8-NEXT:    v_min_f16_e32 v12, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v18, v19, v22, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v22, v19, v25, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v19, v7, vcc
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v19, v21, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v21, v19, v24, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v19, v12, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v22
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, v19, v20, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v20, v19, v23, s[10:11]
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v21
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v20
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v19, v4, s[22:23]
-; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v18
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v5, s[20:21]
-; GFX8-NEXT:    v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v16
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v19, v13, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v19, v6, s[18:19]
-; GFX8-NEXT:    v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 16, v15
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v19, v11, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, v19, v17, s[16:17]
-; GFX8-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 16, v14
-; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 16, v13
-; GFX8-NEXT:    v_or_b32_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v7, v11, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v17, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v17, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v18, v18
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v13
+; GFX8-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v18, v18
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v19, v19
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v19, 16, v12
+; GFX8-NEXT:    v_lshrrev_b32_e32 v20, 16, v4
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v19, v19
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v19, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v20, v20
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v20, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v20, 16, v11
+; GFX8-NEXT:    v_lshrrev_b32_e32 v21, 16, v3
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v20, v20
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v21, v21
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v21, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v21, 16, v10
+; GFX8-NEXT:    v_lshrrev_b32_e32 v22, 16, v2
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v21, v21
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v21, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v22, v22
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v22, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v22, 16, v9
+; GFX8-NEXT:    v_lshrrev_b32_e32 v23, 16, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v22, v22
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v23, v23
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v23, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
+; GFX8-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v23, v23
+; GFX8-NEXT:    v_cndmask_b32_e32 v24, v24, v23, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v24, v24
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_min_f16_e32 v7, v7, v15
+; GFX8-NEXT:    v_min_f16_e32 v6, v6, v14
+; GFX8-NEXT:    v_min_f16_e32 v5, v5, v13
+; GFX8-NEXT:    v_min_f16_e32 v4, v4, v12
+; GFX8-NEXT:    v_min_f16_e32 v3, v3, v11
+; GFX8-NEXT:    v_min_f16_e32 v2, v2, v10
+; GFX8-NEXT:    v_min_f16_e32 v1, v1, v9
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v8
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v23
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v22
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v21
+; GFX8-NEXT:    v_or_b32_e32 v3, v3, v20
+; GFX8-NEXT:    v_or_b32_e32 v4, v4, v19
+; GFX8-NEXT:    v_or_b32_e32 v5, v5, v18
+; GFX8-NEXT:    v_or_b32_e32 v6, v6, v17
+; GFX8-NEXT:    v_or_b32_e32 v7, v7, v16
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v16f16:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_pk_min_f16 v16, v7, v15
-; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v7, v15
-; GFX900-NEXT:    v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX900-NEXT:    v_pk_min_f16 v15, v6, v14
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v6, v14
-; GFX900-NEXT:    v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX900-NEXT:    v_pk_min_f16 v14, v5, v13
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v5, v13
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX900-NEXT:    v_pk_min_f16 v13, v4, v12
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v4, v12
-; GFX900-NEXT:    v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX900-NEXT:    v_pk_min_f16 v12, v3, v11
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v11
-; GFX900-NEXT:    v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX900-NEXT:    v_pk_min_f16 v11, v2, v10
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
-; GFX900-NEXT:    v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX900-NEXT:    v_pk_min_f16 v10, v1, v9
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
-; GFX900-NEXT:    v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX900-NEXT:    v_pk_min_f16 v9, v0, v8
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX900-NEXT:    v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX900-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
+; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v17, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
 ; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX900-NEXT:    v_perm_b32 v0, v0, v10, s4
-; GFX900-NEXT:    v_perm_b32 v1, v1, v11, s4
-; GFX900-NEXT:    v_perm_b32 v2, v2, v12, s4
-; GFX900-NEXT:    v_perm_b32 v3, v3, v13, s4
-; GFX900-NEXT:    v_perm_b32 v4, v4, v14, s4
-; GFX900-NEXT:    v_perm_b32 v5, v5, v15, s4
-; GFX900-NEXT:    v_perm_b32 v6, v6, v16, s4
-; GFX900-NEXT:    v_perm_b32 v7, v7, v18, s4
+; GFX900-NEXT:    v_perm_b32 v8, v17, v8, s4
+; GFX900-NEXT:    v_perm_b32 v0, v16, v0, s4
+; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v9
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v8
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v16, v9, s4
+; GFX900-NEXT:    v_perm_b32 v1, v8, v1, s4
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v10
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v2, v8, v2, s4
+; GFX900-NEXT:    v_pk_min_f16 v2, v2, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v11
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v3, v8, v3, s4
+; GFX900-NEXT:    v_pk_min_f16 v3, v3, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v12
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v12, v4, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v4, v8, v4, s4
+; GFX900-NEXT:    v_pk_min_f16 v4, v4, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v13
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v13, v13
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v13, v5, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v5, v8, v5, s4
+; GFX900-NEXT:    v_pk_min_f16 v5, v5, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v14, v14
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v14, v6, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v6, v8, v6, s4
+; GFX900-NEXT:    v_pk_min_f16 v6, v6, v9
+; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v15
+; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v15, v15
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v15, v7, vcc
+; GFX900-NEXT:    v_perm_b32 v9, v9, v10, s4
+; GFX900-NEXT:    v_perm_b32 v7, v8, v7, s4
+; GFX900-NEXT:    v_pk_min_f16 v7, v7, v9
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v16f16:
@@ -1942,192 +2453,310 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX10-LABEL: v_minimum_v16f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v16, v7, v15
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
-; GFX10-NEXT:    v_pk_min_f16 v18, v6, v14
-; GFX10-NEXT:    v_pk_min_f16 v19, v3, v11
-; GFX10-NEXT:    v_pk_min_f16 v20, v2, v10
-; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v16
-; GFX10-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v15, 16, v18
-; GFX10-NEXT:    v_pk_min_f16 v21, v0, v8
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
-; GFX10-NEXT:    v_pk_min_f16 v17, v5, v13
-; GFX10-NEXT:    v_lshrrev_b32_e32 v23, 16, v21
-; GFX10-NEXT:    v_perm_b32 v7, v7, v16, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
-; GFX10-NEXT:    v_perm_b32 v6, v6, v18, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v17, v4, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
-; GFX10-NEXT:    v_perm_b32 v5, v5, v15, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
-; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v19
-; GFX10-NEXT:    v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v11, v1, v9
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX10-NEXT:    v_lshrrev_b32_e32 v22, 16, v11
-; GFX10-NEXT:    v_perm_b32 v3, v3, v19, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX10-NEXT:    v_lshrrev_b32_e32 v20, 16, v20
-; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
-; GFX10-NEXT:    v_perm_b32 v1, v1, v11, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_perm_b32 v0, v0, v9, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo
-; GFX10-NEXT:    v_perm_b32 v4, v4, v13, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v8
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; GFX10-NEXT:    v_lshrrev_b32_e32 v19, 16, v9
+; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v19, v19
+; GFX10-NEXT:    v_perm_b32 v0, v17, v0, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
+; GFX10-NEXT:    v_perm_b32 v8, v16, v8, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v8
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v19, v19, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_perm_b32 v1, v18, v1, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 16, v11
+; GFX10-NEXT:    v_perm_b32 v9, v19, v9, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_perm_b32 v2, v16, v2, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v10, v17, v10, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v19, v18, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v5
+; GFX10-NEXT:    v_pk_min_f16 v2, v2, v10
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v18, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 16, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_perm_b32 v3, v8, v3, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v11, v16, v11, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v13
+; GFX10-NEXT:    v_pk_min_f16 v3, v3, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_perm_b32 v4, v9, v4, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v8, v10, v12, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v17, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v14
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX10-NEXT:    v_pk_min_f16 v4, v4, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v5, v9, v5, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v13, v16, v13, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_pk_min_f16 v5, v5, v13
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_perm_b32 v6, v10, v6, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v9, v12, v14, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_pk_min_f16 v6, v6, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v7, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v7, v17, v7, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v10, v18, v15, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v7, v7, v10
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-TRUE16-LABEL: v_minimum_v16f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7.l, v15.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v16, v7, v15
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v6.l, v14.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v6.h, v14.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v5.l, v13.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v5.h, v13.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, 0x7e00, v16.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7.h, v15.h
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v15, v6, v14
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v14, v5, v13
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v13, v4, v12
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v1.h, v9.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, 0x7e00, v16.h, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, 0x7e00, v15.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, 0x7e00, v15.h, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, 0x7e00, v14.l, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, 0x7e00, v14.h, s3
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4.l, v12.l
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v11.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v14, v3, v11
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v2.l, v10.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v15, v2, v10
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, 0x7e00, v13.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4.h, v12.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v14.l, s0
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.h, v11.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v15.l, s1
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v1.l, v9.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v11, v1, v9
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v0.l, v8.l
-; GFX11-TRUE16-NEXT:    v_pk_min_f16 v12, v0, v8
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v0.h, v8.h
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s5, v2.h, v10.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v11.l, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v11.h, s4
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v12.l, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v12.h, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v15.h, s5
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v14.h, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, 0x7e00, v13.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8.h, v8.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v8.l, v8.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v9.l, v9.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v10.h, v10.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v10.l, v10.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v8.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v8.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v9.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v10.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v10.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v2.h, v2.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v2.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.h, v8.h, v0.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9.h, v9.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.l, v9.l, v1.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.h, v10.h, v2.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.l, v10.l, v2.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v12.h, v12.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v9.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v12.l, v12.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v13.h, v13.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, v12.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.h, v1.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.l, v8.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11.h, v11.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, v12.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, v13.h, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.h, v9.h, v1.h, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v11.l, v11.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v11.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v4.h, v4.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v4.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v5.h, v5.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v11.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.h, v3.h
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v8
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v2, v2, v10
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.h, v11.h, v3.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.h, v12.h, v4.h, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.l, v12.l, v4.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.h, v13.h, v5.h, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.l, v11.l, v3.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13.l, v13.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v14.h, v14.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v14.l, v14.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v15.h, v15.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v15.l, v15.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, v13.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, v14.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, v14.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v7.h, v15.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v7.l, v15.l, s3
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5.l, v5.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v6.h, v6.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v6.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v7.h, v7.h
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v7.l, v7.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.l, v13.l, v5.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v11.h, v14.h, v6.h, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v11.l, v14.l, v6.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v12.h, v15.h, v7.h, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v12.l, v15.l, v7.l, s3
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v3, v3, v8
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v4, v9
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v5, v5, v10
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v6, v6, v11
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v7, v7, v12
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_minimum_v16f16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v16, v7, v15
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v15, v6, v14
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v20, v4, v12
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v22, v2, v10
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v14, v5, v13
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v17, v3, v11
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
-; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v19, v1, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-FAKE16-NEXT:    v_pk_min_f16 v22, v0, v8
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v9
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v19, v19
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc_lo
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v19, v19, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v17, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v8, v16, v8, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v18, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v11
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v9, v19, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v16, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v10, v17, v10, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v19, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v2, v2, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v12
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v18, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v15
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v11, v16, v11, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v13
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v12, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v16, v16
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v9, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v13, v13
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v8, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v8, v10, v12, 0x5040100
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v3, v3, v11
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v4, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v16, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v12, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v13, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v9, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v12, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v14, v14
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v18, v18
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v15, v15
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-FAKE16-NEXT:    v_perm_b32 v13, v16, v13, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v17, v17
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v5, v5, v13
+; GFX11-FAKE16-NEXT:    v_perm_b32 v9, v12, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v18, v18, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, v15, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v10, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v17, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v10, v18, v15, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v6, v6, v9
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v7, v7, v10
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v16f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 3e98599fc4c7f..d6b60bf25b2a7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -12,28 +12,31 @@ define float @v_minimum_f32(float %src0, float %src1) {
 ; GFX7-LABEL: v_minimum_f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f32:
@@ -45,18 +48,22 @@ define float @v_minimum_f32(float %src0, float %src1) {
 ; GFX10-LABEL: v_minimum_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f32:
@@ -120,28 +127,31 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
 ; GFX7-LABEL: v_minimum_f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f32__nsz:
@@ -153,18 +163,22 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
 ; GFX10-LABEL: v_minimum_f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f32__nsz:
@@ -229,30 +243,33 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f32__nnan_src0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f32__nnan_src0:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX900-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f32__nnan_src0:
@@ -266,19 +283,24 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f32__nnan_src0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f32__nnan_src0:
@@ -302,30 +324,27 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX7-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f32__nnan_src1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f32__nnan_src1:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX900-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f32__nnan_src1:
@@ -339,19 +358,19 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX10-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f32__nnan_src1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f32__nnan_src1:
@@ -374,11 +393,13 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX7-LABEL: s_minimum_f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s17
-; GFX7-NEXT:    v_min_f32_e32 v1, s16, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT:    v_mov_b32_e32 v0, s16
+; GFX7-NEXT:    v_mov_b32_e32 v1, s17
+; GFX7-NEXT:    v_cmp_u_f32_e64 vcc, s17, s17
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v0
 ; GFX7-NEXT:    ;;#ASMEND
@@ -387,11 +408,13 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX8-LABEL: s_minimum_f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s17
-; GFX8-NEXT:    v_min_f32_e32 v1, s16, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v0, s16
+; GFX8-NEXT:    v_mov_b32_e32 v1, s17
+; GFX8-NEXT:    v_cmp_u_f32_e64 vcc, s17, s17
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -400,11 +423,13 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX900-LABEL: s_minimum_f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s17
-; GFX900-NEXT:    v_min_f32_e32 v1, s16, v0
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v1, s17
+; GFX900-NEXT:    v_cmp_u_f32_e64 vcc, s17, s17
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -423,9 +448,12 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX10-LABEL: s_minimum_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v0, s17
+; GFX10-NEXT:    v_cmp_u_f32_e64 vcc_lo, s17, s17
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s17, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -434,10 +462,14 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX11-LABEL: s_minimum_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s1, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v0
 ; GFX11-NEXT:    ;;#ASMEND
@@ -465,37 +497,46 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v2f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v2f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX900-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f32:
@@ -508,23 +549,32 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v2f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_min_f32_e32 v5, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v2f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v4, v0, v2 :: v_dual_min_f32 v5, v1, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_min_f32 v0, v0, v2
+; GFX11-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f32:
@@ -594,37 +644,46 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v2f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v2f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX900-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f32__nsz:
@@ -637,23 +696,32 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v2f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_min_f32_e32 v5, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v2f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v4, v0, v2 :: v_dual_min_f32 v5, v1, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_min_f32 v0, v0, v2
+; GFX11-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f32__nsz:
@@ -723,15 +791,20 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX7-LABEL: s_minimum_v2f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s19
-; GFX7-NEXT:    v_min_f32_e32 v1, s17, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
-; GFX7-NEXT:    v_mov_b32_e32 v0, s18
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v3, s16, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_mov_b32_e32 v0, s17
+; GFX7-NEXT:    v_mov_b32_e32 v1, s19
+; GFX7-NEXT:    v_cmp_u_f32_e64 vcc, s19, s19
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v1, v0, v1
+; GFX7-NEXT:    v_mov_b32_e32 v0, s16
+; GFX7-NEXT:    v_mov_b32_e32 v2, s18
+; GFX7-NEXT:    v_cmp_u_f32_e64 vcc, s18, s18
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v2
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:1]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -740,15 +813,20 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX8-LABEL: s_minimum_v2f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s19
-; GFX8-NEXT:    v_min_f32_e32 v1, s17, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s18
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v3, s16, v0
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v0, s17
+; GFX8-NEXT:    v_mov_b32_e32 v1, s19
+; GFX8-NEXT:    v_cmp_u_f32_e64 vcc, s19, s19
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v1, v0, v1
+; GFX8-NEXT:    v_mov_b32_e32 v0, s16
+; GFX8-NEXT:    v_mov_b32_e32 v2, s18
+; GFX8-NEXT:    v_cmp_u_f32_e64 vcc, s18, s18
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -757,15 +835,20 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX900-LABEL: s_minimum_v2f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s19
-; GFX900-NEXT:    v_min_f32_e32 v1, s17, v0
-; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
-; GFX900-NEXT:    v_mov_b32_e32 v0, s18
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX900-NEXT:    v_min_f32_e32 v3, s16, v0
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT:    v_mov_b32_e32 v0, s17
+; GFX900-NEXT:    v_mov_b32_e32 v1, s19
+; GFX900-NEXT:    v_cmp_u_f32_e64 vcc, s19, s19
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v1, v0, v1
+; GFX900-NEXT:    v_mov_b32_e32 v0, s16
+; GFX900-NEXT:    v_mov_b32_e32 v2, s18
+; GFX900-NEXT:    v_cmp_u_f32_e64 vcc, s18, s18
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v2
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:1]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -786,12 +869,18 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX10-LABEL: s_minimum_v2f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e64 v0, s17, s19
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s17, s19
-; GFX10-NEXT:    v_min_f32_e64 v2, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v0, s19
+; GFX10-NEXT:    v_cmp_u_f32_e64 vcc_lo, s19, s19
+; GFX10-NEXT:    v_mov_b32_e32 v1, s18
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, s17, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e64 vcc_lo, s18, s18
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, s16, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, s19, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, s18, v2, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v2, v3
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:1]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -800,13 +889,20 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX11-LABEL: s_minimum_v2f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e64 v0, s1, s3
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT:    v_min_f32_e64 v2, s0, s2
+; GFX11-NEXT:    v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s3, s3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s2, s2
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, s0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v1, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, s2, v2, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v2, v3
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:1]
 ; GFX11-NEXT:    ;;#ASMEND
@@ -835,46 +931,61 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v3f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX7-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v3f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX900-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX900-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX900-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f32:
@@ -888,29 +999,40 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v3f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_min_f32_e32 v7, v1, v4
-; GFX10-NEXT:    v_min_f32_e32 v8, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v3f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v6, v0, v3 :: v_dual_min_f32 v7, v1, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT:    v_dual_min_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v2 :: v_dual_min_f32 v0, v0, v3
+; GFX11-NEXT:    v_dual_min_f32 v1, v1, v4 :: v_dual_min_f32 v2, v2, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f32:
@@ -987,46 +1109,61 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v3f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX7-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v3f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX900-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX900-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX900-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f32__nsz:
@@ -1040,29 +1177,40 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v3f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_min_f32_e32 v7, v1, v4
-; GFX10-NEXT:    v_min_f32_e32 v8, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v2, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v3f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v6, v0, v3 :: v_dual_min_f32 v7, v1, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT:    v_dual_min_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v2 :: v_dual_min_f32 v0, v0, v3
+; GFX11-NEXT:    v_dual_min_f32 v1, v1, v4 :: v_dual_min_f32 v2, v2, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f32__nsz:
@@ -1139,55 +1287,76 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v4f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX7-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v4f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX900-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX900-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX900-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f32:
@@ -1202,35 +1371,52 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v4f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_min_f32_e32 v9, v1, v5
-; GFX10-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_min_f32_e32 v8, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_min_f32_e32 v2, v2, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v3, v3, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v4f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v8, v0, v4 :: v_dual_min_f32 v9, v1, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX11-NEXT:    v_dual_min_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v3 :: v_dual_min_f32 v2, v2, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v3, v3, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f32:
@@ -1313,55 +1499,76 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v4f32__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX7-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v4f32__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f32__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v4, v1, v5
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX900-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX900-NEXT:    v_min_f32_e32 v4, v3, v7
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX900-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f32__nsz:
@@ -1376,35 +1583,52 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v4f32__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v8, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_min_f32_e32 v9, v1, v5
-; GFX10-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_min_f32_e32 v8, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_min_f32_e32 v2, v2, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v3, v3, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v4f32__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v8, v0, v4 :: v_dual_min_f32 v9, v1, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    v_min_f32_e32 v4, v2, v6
-; GFX11-NEXT:    v_dual_min_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v3 :: v_dual_min_f32 v2, v2, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v3, v3, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f32__nsz:
@@ -1487,91 +1711,136 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v8f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v16, v0, v8
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v1, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v2, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v3, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v4, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v5, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v6, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v8, v7, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_min_f32_e32 v2, v2, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_min_f32_e32 v3, v3, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_min_f32_e32 v4, v4, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_min_f32_e32 v5, v5, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX7-NEXT:    v_min_f32_e32 v7, v7, v8
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v8f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v16, v0, v8
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v1, v9
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v2, v10
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v3, v11
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v4, v12
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v5, v13
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX8-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v6, v14
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v8, v7, v15
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_min_f32_e32 v2, v2, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_min_f32_e32 v3, v3, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_min_f32_e32 v4, v4, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_min_f32_e32 v5, v5, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX8-NEXT:    v_min_f32_e32 v7, v7, v8
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v8f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v16, v0, v8
-; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v1, v9
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v2, v10
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
-; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v3, v11
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v4, v12
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
-; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v5, v13
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
-; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v6, v14
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX900-NEXT:    v_min_f32_e32 v8, v7, v15
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
-; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX900-NEXT:    v_min_f32_e32 v0, v0, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_min_f32_e32 v2, v2, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_min_f32_e32 v3, v3, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_min_f32_e32 v4, v4, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_min_f32_e32 v5, v5, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_min_f32_e32 v6, v6, v8
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX900-NEXT:    v_min_f32_e32 v7, v7, v8
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v8f32:
@@ -1590,57 +1859,94 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
 ; GFX10-LABEL: v_minimum_v8f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v16, v0, v8
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT:    v_min_f32_e32 v17, v1, v9
-; GFX10-NEXT:    v_min_f32_e32 v8, v2, v10
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT:    v_min_f32_e32 v9, v3, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT:    v_min_f32_e32 v10, v7, v15
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT:    v_min_f32_e32 v8, v4, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT:    v_min_f32_e32 v9, v5, v13
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT:    v_min_f32_e32 v8, v6, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v10, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_min_f32_e32 v2, v2, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_min_f32_e32 v3, v3, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v12, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_min_f32_e32 v4, v4, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v13, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_min_f32_e32 v5, v5, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v14, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_min_f32_e32 v6, v6, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v15, v7, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v7, v7, v12
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v8f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_min_f32 v16, v0, v8 :: v_dual_min_f32 v17, v1, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT:    v_dual_min_f32 v8, v2, v10 :: v_dual_min_f32 v9, v3, v11
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT:    v_min_f32_e32 v10, v7, v15
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT:    v_dual_min_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT:    v_dual_min_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_dual_min_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v8
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v9, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v9, v10, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_dual_min_f32 v1, v1, v8 :: v_dual_min_f32 v2, v2, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v11, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v9, v12, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX11-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_min_f32_e32 v4, v4, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v10, v13, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v11, v14, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_min_f32 v3, v3, v8 :: v_dual_min_f32 v6, v6, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v12, v15, v7, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v7, v7, v12
+; GFX11-NEXT:    v_min_f32_e32 v5, v5, v10
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v8f32:
@@ -1667,169 +1973,262 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
 ; GFX7-LABEL: v_minimum_v16f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX7-NEXT:    v_min_f32_e32 v0, v0, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_min_f32_e32 v1, v1, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_min_f32_e32 v2, v2, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX7-NEXT:    v_min_f32_e32 v3, v3, v16
 ; GFX7-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v17
-; GFX7-NEXT:    v_min_f32_e32 v1, v1, v17
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v18
-; GFX7-NEXT:    v_min_f32_e32 v2, v2, v18
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_min_f32_e32 v18, v13, v29
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v19
-; GFX7-NEXT:    v_min_f32_e32 v3, v3, v19
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v20
-; GFX7-NEXT:    v_min_f32_e32 v4, v4, v20
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v21
-; GFX7-NEXT:    v_min_f32_e32 v5, v5, v21
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v22
-; GFX7-NEXT:    v_min_f32_e32 v6, v6, v22
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v23
-; GFX7-NEXT:    v_min_f32_e32 v7, v7, v23
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX7-NEXT:    v_min_f32_e32 v8, v8, v24
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX7-NEXT:    v_min_f32_e32 v9, v9, v25
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX7-NEXT:    v_min_f32_e32 v10, v10, v26
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX7-NEXT:    v_min_f32_e32 v11, v11, v27
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX7-NEXT:    v_min_f32_e32 v12, v12, v28
-; GFX7-NEXT:    v_min_f32_e32 v19, v14, v30
-; GFX7-NEXT:    v_cmp_o_f32_e64 s[40:41], v14, v30
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v17, v2, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v17, v3, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v17, v4, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v17, v5, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v17, v6, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v17, v7, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v17, v19, s[40:41]
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v20, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_min_f32_e32 v4, v4, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_min_f32_e32 v5, v5, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v22, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_min_f32_e32 v6, v6, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_min_f32_e32 v7, v7, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v24, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_min_f32_e32 v8, v8, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_min_f32_e32 v9, v9, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v26, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_min_f32_e32 v10, v10, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_min_f32_e32 v11, v11, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v28, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_min_f32_e32 v12, v12, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_min_f32_e32 v13, v13, v17
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v30, v14, vcc
+; GFX7-NEXT:    v_min_f32_e32 v14, v14, v17
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v18, v15, v16
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v15, v16
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX7-NEXT:    v_min_f32_e32 v15, v15, v16
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v16f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX8-NEXT:    v_min_f32_e32 v0, v0, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_min_f32_e32 v1, v1, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_min_f32_e32 v2, v2, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX8-NEXT:    v_min_f32_e32 v3, v3, v16
 ; GFX8-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v17
-; GFX8-NEXT:    v_min_f32_e32 v1, v1, v17
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v18
-; GFX8-NEXT:    v_min_f32_e32 v2, v2, v18
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX8-NEXT:    v_min_f32_e32 v18, v13, v29
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v19
-; GFX8-NEXT:    v_min_f32_e32 v3, v3, v19
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v20
-; GFX8-NEXT:    v_min_f32_e32 v4, v4, v20
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v21
-; GFX8-NEXT:    v_min_f32_e32 v5, v5, v21
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v22
-; GFX8-NEXT:    v_min_f32_e32 v6, v6, v22
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v23
-; GFX8-NEXT:    v_min_f32_e32 v7, v7, v23
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX8-NEXT:    v_min_f32_e32 v8, v8, v24
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX8-NEXT:    v_min_f32_e32 v9, v9, v25
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX8-NEXT:    v_min_f32_e32 v10, v10, v26
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX8-NEXT:    v_min_f32_e32 v11, v11, v27
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX8-NEXT:    v_min_f32_e32 v12, v12, v28
-; GFX8-NEXT:    v_min_f32_e32 v19, v14, v30
-; GFX8-NEXT:    v_cmp_o_f32_e64 s[40:41], v14, v30
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v17, v2, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v17, v3, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v17, v4, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v17, v5, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v17, v6, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v17, v7, s[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v17, v19, s[40:41]
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v20, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_min_f32_e32 v4, v4, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_min_f32_e32 v5, v5, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v22, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_min_f32_e32 v6, v6, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX8-NEXT:    v_min_f32_e32 v7, v7, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v24, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX8-NEXT:    v_min_f32_e32 v8, v8, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX8-NEXT:    v_min_f32_e32 v9, v9, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v26, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX8-NEXT:    v_min_f32_e32 v10, v10, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX8-NEXT:    v_min_f32_e32 v11, v11, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v28, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX8-NEXT:    v_min_f32_e32 v12, v12, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX8-NEXT:    v_min_f32_e32 v13, v13, v17
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v30, v14, vcc
+; GFX8-NEXT:    v_min_f32_e32 v14, v14, v17
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v18, v15, v16
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v15, v16
-; GFX8-NEXT:    v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX8-NEXT:    v_min_f32_e32 v15, v15, v16
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v16f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v16
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
 ; GFX900-NEXT:    v_min_f32_e32 v0, v0, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_min_f32_e32 v1, v1, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_min_f32_e32 v2, v2, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX900-NEXT:    v_min_f32_e32 v3, v3, v16
 ; GFX900-NEXT:    buffer_load_dword v16, off, s[0:3], s32
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[4:5], v1, v17
-; GFX900-NEXT:    v_min_f32_e32 v1, v1, v17
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[6:7], v2, v18
-; GFX900-NEXT:    v_min_f32_e32 v2, v2, v18
-; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX900-NEXT:    v_min_f32_e32 v18, v13, v29
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[8:9], v3, v19
-; GFX900-NEXT:    v_min_f32_e32 v3, v3, v19
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[10:11], v4, v20
-; GFX900-NEXT:    v_min_f32_e32 v4, v4, v20
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[12:13], v5, v21
-; GFX900-NEXT:    v_min_f32_e32 v5, v5, v21
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[14:15], v6, v22
-; GFX900-NEXT:    v_min_f32_e32 v6, v6, v22
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[16:17], v7, v23
-; GFX900-NEXT:    v_min_f32_e32 v7, v7, v23
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX900-NEXT:    v_min_f32_e32 v8, v8, v24
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX900-NEXT:    v_min_f32_e32 v9, v9, v25
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX900-NEXT:    v_min_f32_e32 v10, v10, v26
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX900-NEXT:    v_min_f32_e32 v11, v11, v27
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX900-NEXT:    v_min_f32_e32 v12, v12, v28
-; GFX900-NEXT:    v_min_f32_e32 v19, v14, v30
-; GFX900-NEXT:    v_cmp_o_f32_e64 s[40:41], v14, v30
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v17, v2, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v17, v3, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v17, v4, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v17, v5, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v17, v6, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v17, v7, s[16:17]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v17, v19, s[40:41]
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v20, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX900-NEXT:    v_min_f32_e32 v4, v4, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX900-NEXT:    v_min_f32_e32 v5, v5, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v22, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX900-NEXT:    v_min_f32_e32 v6, v6, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX900-NEXT:    v_min_f32_e32 v7, v7, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v24, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX900-NEXT:    v_min_f32_e32 v8, v8, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX900-NEXT:    v_min_f32_e32 v9, v9, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v26, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
+; GFX900-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX900-NEXT:    v_min_f32_e32 v10, v10, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
+; GFX900-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX900-NEXT:    v_min_f32_e32 v11, v11, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v28, v12, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
+; GFX900-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX900-NEXT:    v_min_f32_e32 v12, v12, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX900-NEXT:    v_min_f32_e32 v13, v13, v17
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v30, v14, vcc
+; GFX900-NEXT:    v_min_f32_e32 v14, v14, v17
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_min_f32_e32 v18, v15, v16
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v15, v16
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v15, vcc
+; GFX900-NEXT:    v_min_f32_e32 v15, v15, v16
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v16f32:
@@ -1859,105 +2258,169 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    v_min_f32_e32 v32, v0, v16
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX10-NEXT:    v_min_f32_e32 v33, v1, v17
-; GFX10-NEXT:    v_min_f32_e32 v34, v2, v18
-; GFX10-NEXT:    v_min_f32_e32 v35, v3, v19
-; GFX10-NEXT:    v_min_f32_e32 v36, v4, v20
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT:    v_min_f32_e32 v37, v5, v21
-; GFX10-NEXT:    v_min_f32_e32 v38, v6, v22
-; GFX10-NEXT:    v_min_f32_e32 v39, v7, v23
-; GFX10-NEXT:    v_min_f32_e32 v48, v8, v24
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT:    v_min_f32_e32 v49, v9, v25
-; GFX10-NEXT:    v_min_f32_e32 v50, v10, v26
-; GFX10-NEXT:    v_min_f32_e32 v51, v11, v27
-; GFX10-NEXT:    v_min_f32_e32 v52, v12, v28
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT:    v_min_f32_e32 v53, v13, v29
-; GFX10-NEXT:    v_min_f32_e32 v54, v14, v30
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT:    v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT:    v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v16, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v17, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v18, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v19, v19
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v20, v20
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v21, v21
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v22, v22
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v23, v23
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v24, v24
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v25, v25
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v26, v26
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v27, v27
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v28, v28
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v29, v29
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v30, v30
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v1, v1, v17
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_min_f32_e32 v2, v2, v18
+; GFX10-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_min_f32_e32 v3, v3, v19
+; GFX10-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_min_f32_e32 v4, v4, v20
+; GFX10-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_min_f32_e32 v5, v5, v21
+; GFX10-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_min_f32_e32 v6, v6, v22
+; GFX10-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_min_f32_e32 v7, v7, v23
+; GFX10-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_min_f32_e32 v8, v8, v24
+; GFX10-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_min_f32_e32 v9, v9, v25
+; GFX10-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_min_f32_e32 v10, v10, v26
+; GFX10-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_min_f32_e32 v11, v11, v27
+; GFX10-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_min_f32_e32 v12, v12, v28
+; GFX10-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_min_f32_e32 v13, v13, v29
+; GFX10-NEXT:    v_cndmask_b32_e32 v30, v30, v14, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v14, v14, v30
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v16, v15, v31
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v31, v31
+; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v31, v15, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v15, v15, v16
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v16f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_dual_min_f32 v32, v0, v16 :: v_dual_min_f32 v33, v1, v17
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX11-NEXT:    v_dual_min_f32 v34, v2, v18 :: v_dual_min_f32 v35, v3, v19
-; GFX11-NEXT:    v_dual_min_f32 v36, v4, v20 :: v_dual_min_f32 v37, v5, v21
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT:    v_min_f32_e32 v54, v14, v30
-; GFX11-NEXT:    v_dual_min_f32 v38, v6, v22 :: v_dual_min_f32 v39, v7, v23
-; GFX11-NEXT:    v_dual_min_f32 v48, v8, v24 :: v_dual_min_f32 v49, v9, v25
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT:    v_dual_min_f32 v50, v10, v26 :: v_dual_min_f32 v51, v11, v27
-; GFX11-NEXT:    v_dual_min_f32 v52, v12, v28 :: v_dual_min_f32 v53, v13, v29
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT:    v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v16, v16
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v17, v17
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v18, v18
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v19, v19
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v20, v20
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v21, v21
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v22, v22
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v23, v23
+; GFX11-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v24, v24
+; GFX11-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v25, v25
+; GFX11-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v26, v26
+; GFX11-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v27, v27
+; GFX11-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v28, v28
+; GFX11-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v29, v29
+; GFX11-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v30, v30
+; GFX11-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_min_f32 v1, v1, v17 :: v_dual_cndmask_b32 v18, v18, v2
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_dual_min_f32 v3, v3, v19 :: v_dual_cndmask_b32 v20, v20, v4
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_min_f32 v0, v0, v16 :: v_dual_min_f32 v5, v5, v21
+; GFX11-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX11-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX11-NEXT:    v_dual_min_f32 v4, v4, v20 :: v_dual_min_f32 v7, v7, v23
+; GFX11-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX11-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_min_f32 v6, v6, v22 :: v_dual_min_f32 v9, v9, v25
+; GFX11-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-NEXT:    v_dual_min_f32 v8, v8, v24 :: v_dual_min_f32 v11, v11, v27
+; GFX11-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_min_f32 v12, v12, v28 :: v_dual_cndmask_b32 v29, v29, v13
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX11-NEXT:    v_dual_min_f32 v10, v10, v26 :: v_dual_min_f32 v13, v13, v29
+; GFX11-NEXT:    v_cndmask_b32_e32 v30, v30, v14, vcc_lo
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v16, v15, v31
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v31, v31
+; GFX11-NEXT:    v_dual_min_f32 v2, v2, v18 :: v_dual_cndmask_b32 v15, v15, v31
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX11-NEXT:    v_cndmask_b32_e32 v16, v31, v15, vcc_lo
+; GFX11-NEXT:    v_dual_min_f32 v14, v14, v30 :: v_dual_min_f32 v15, v15, v16
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index d07bd6c8dd902..4f6b189cae7c9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -12,61 +12,74 @@ define double @v_minimum_f64(double %src0, double %src1) {
 ; GFX7-LABEL: v_minimum_f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64:
@@ -130,61 +143,74 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) {
 ; GFX7-LABEL: v_minimum_f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64__nsz:
@@ -249,67 +275,80 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64__nnan_src0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64__nnan_src0:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64__nnan_src0:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64__nnan_src0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64__nnan_src0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64__nnan_src0:
@@ -333,67 +372,61 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64__nnan_src1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64__nnan_src1:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64__nnan_src1:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64__nnan_src1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64__nnan_src1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64__nnan_src1:
@@ -416,13 +449,17 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
 ; GFX7-LABEL: s_minimum_f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s18
-; GFX7-NEXT:    v_mov_b32_e32 v1, s19
-; GFX7-NEXT:    v_min_f64 v[2:3], s[16:17], v[0:1]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX7-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX7-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX7-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX7-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX7-NEXT:    s_cselect_b32 s6, s5, s19
+; GFX7-NEXT:    s_cselect_b32 s7, s4, s18
+; GFX7-NEXT:    v_mov_b32_e32 v0, s7
+; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_min_f64 v[0:1], s[4:5], v[0:1]
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:1]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -431,13 +468,17 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
 ; GFX8-LABEL: s_minimum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s18
-; GFX8-NEXT:    v_mov_b32_e32 v1, s19
-; GFX8-NEXT:    v_min_f64 v[2:3], s[16:17], v[0:1]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX8-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX8-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX8-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX8-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX8-NEXT:    s_cselect_b32 s6, s5, s19
+; GFX8-NEXT:    s_cselect_b32 s7, s4, s18
+; GFX8-NEXT:    v_mov_b32_e32 v0, s7
+; GFX8-NEXT:    v_mov_b32_e32 v1, s6
+; GFX8-NEXT:    v_min_f64 v[0:1], s[4:5], v[0:1]
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -446,13 +487,17 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
 ; GFX900-LABEL: s_minimum_f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s18
-; GFX900-NEXT:    v_mov_b32_e32 v1, s19
-; GFX900-NEXT:    v_min_f64 v[2:3], s[16:17], v[0:1]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX900-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX900-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX900-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX900-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX900-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT:    s_cselect_b32 s6, s5, s19
+; GFX900-NEXT:    s_cselect_b32 s7, s4, s18
+; GFX900-NEXT:    v_mov_b32_e32 v0, s7
+; GFX900-NEXT:    v_mov_b32_e32 v1, s6
+; GFX900-NEXT:    v_min_f64 v[0:1], s[4:5], v[0:1]
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:1]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -461,13 +506,17 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
 ; GFX950-LABEL: s_minimum_f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
-; GFX950-NEXT:    v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX950-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX950-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[0:1], s[0:1]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX950-NEXT:    s_cselect_b32 s2, s0, s2
+; GFX950-NEXT:    v_mov_b32_e32 v0, s2
+; GFX950-NEXT:    v_mov_b32_e32 v1, s3
+; GFX950-NEXT:    v_min_f64 v[0:1], s[0:1], v[0:1]
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; use v[0:1]
 ; GFX950-NEXT:    ;;#ASMEND
@@ -476,10 +525,15 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
 ; GFX10-LABEL: s_minimum_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[0:1], s[16:17], s[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[16:17], s[18:19]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s4
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[18:19], s[18:19]
+; GFX10-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s5, s19, s17
+; GFX10-NEXT:    s_cselect_b32 s4, s18, s16
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, s[4:5], s[4:5]
+; GFX10-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s7, s5, s19
+; GFX10-NEXT:    s_cselect_b32 s6, s4, s18
+; GFX10-NEXT:    v_min_f64 v[0:1], s[4:5], s[6:7]
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:1]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -488,11 +542,16 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
 ; GFX11-LABEL: s_minimum_f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[2:3], s[2:3]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX11-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[0:1], s[0:1]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX11-NEXT:    s_cselect_b32 s2, s0, s2
 ; GFX11-NEXT:    v_min_f64 v[0:1], s[0:1], s[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:1]
 ; GFX11-NEXT:    ;;#ASMEND
@@ -519,86 +578,120 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
 ; GFX7-LABEL: v_minimum_v2f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v2f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX900-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s4
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v2f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v1 :: v_dual_cndmask_b32 v4, v4, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f64:
@@ -669,86 +762,120 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
 ; GFX7-LABEL: v_minimum_v2f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v2f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX900-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[4:5]
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s4
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s4
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v2f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v1 :: v_dual_cndmask_b32 v4, v4, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f64__nsz:
@@ -819,19 +946,28 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX7-LABEL: s_minimum_v2f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s22
-; GFX7-NEXT:    v_mov_b32_e32 v1, s23
-; GFX7-NEXT:    v_min_f64 v[2:3], s[18:19], v[0:1]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX7-NEXT:    v_mov_b32_e32 v0, s20
-; GFX7-NEXT:    v_mov_b32_e32 v1, s21
-; GFX7-NEXT:    v_min_f64 v[4:5], s[16:17], v[0:1]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
-; GFX7-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v5, v6, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], s[22:23], s[22:23]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], s[20:21], s[20:21]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX7-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX7-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX7-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX7-NEXT:    s_cselect_b32 s10, s5, s23
+; GFX7-NEXT:    s_cselect_b32 s11, s4, s22
+; GFX7-NEXT:    s_and_b64 s[6:7], s[8:9], exec
+; GFX7-NEXT:    s_cselect_b32 s7, s21, s17
+; GFX7-NEXT:    s_cselect_b32 s6, s20, s16
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX7-NEXT:    v_mov_b32_e32 v0, s11
+; GFX7-NEXT:    v_mov_b32_e32 v1, s10
+; GFX7-NEXT:    v_min_f64 v[2:3], s[4:5], v[0:1]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; GFX7-NEXT:    s_cselect_b32 s4, s7, s21
+; GFX7-NEXT:    s_cselect_b32 s5, s6, s20
+; GFX7-NEXT:    v_mov_b32_e32 v0, s5
+; GFX7-NEXT:    v_mov_b32_e32 v1, s4
+; GFX7-NEXT:    v_min_f64 v[0:1], s[6:7], v[0:1]
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:3]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -840,19 +976,28 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX8-LABEL: s_minimum_v2f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s22
-; GFX8-NEXT:    v_mov_b32_e32 v1, s23
-; GFX8-NEXT:    v_min_f64 v[2:3], s[18:19], v[0:1]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v0, s20
-; GFX8-NEXT:    v_mov_b32_e32 v1, s21
-; GFX8-NEXT:    v_min_f64 v[4:5], s[16:17], v[0:1]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v5, v6, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], s[22:23], s[22:23]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], s[20:21], s[20:21]
+; GFX8-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX8-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX8-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX8-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX8-NEXT:    s_cselect_b32 s10, s5, s23
+; GFX8-NEXT:    s_cselect_b32 s11, s4, s22
+; GFX8-NEXT:    s_and_b64 s[6:7], s[8:9], exec
+; GFX8-NEXT:    s_cselect_b32 s7, s21, s17
+; GFX8-NEXT:    s_cselect_b32 s6, s20, s16
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX8-NEXT:    v_mov_b32_e32 v0, s11
+; GFX8-NEXT:    v_mov_b32_e32 v1, s10
+; GFX8-NEXT:    v_min_f64 v[2:3], s[4:5], v[0:1]
+; GFX8-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; GFX8-NEXT:    s_cselect_b32 s4, s7, s21
+; GFX8-NEXT:    s_cselect_b32 s5, s6, s20
+; GFX8-NEXT:    v_mov_b32_e32 v0, s5
+; GFX8-NEXT:    v_mov_b32_e32 v1, s4
+; GFX8-NEXT:    v_min_f64 v[0:1], s[6:7], v[0:1]
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:3]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -861,19 +1006,28 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX900-LABEL: s_minimum_v2f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mov_b32_e32 v0, s22
-; GFX900-NEXT:    v_mov_b32_e32 v1, s23
-; GFX900-NEXT:    v_min_f64 v[2:3], s[18:19], v[0:1]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX900-NEXT:    v_mov_b32_e32 v0, s20
-; GFX900-NEXT:    v_mov_b32_e32 v1, s21
-; GFX900-NEXT:    v_min_f64 v[4:5], s[16:17], v[0:1]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
-; GFX900-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v1, v5, v6, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], s[22:23], s[22:23]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], s[20:21], s[20:21]
+; GFX900-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX900-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX900-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], s[4:5], s[4:5]
+; GFX900-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT:    s_cselect_b32 s10, s5, s23
+; GFX900-NEXT:    s_cselect_b32 s11, s4, s22
+; GFX900-NEXT:    s_and_b64 s[6:7], s[8:9], exec
+; GFX900-NEXT:    s_cselect_b32 s7, s21, s17
+; GFX900-NEXT:    s_cselect_b32 s6, s20, s16
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], s[6:7], s[6:7]
+; GFX900-NEXT:    v_mov_b32_e32 v0, s11
+; GFX900-NEXT:    v_mov_b32_e32 v1, s10
+; GFX900-NEXT:    v_min_f64 v[2:3], s[4:5], v[0:1]
+; GFX900-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; GFX900-NEXT:    s_cselect_b32 s4, s7, s21
+; GFX900-NEXT:    s_cselect_b32 s5, s6, s20
+; GFX900-NEXT:    v_mov_b32_e32 v0, s5
+; GFX900-NEXT:    v_mov_b32_e32 v1, s4
+; GFX900-NEXT:    v_min_f64 v[0:1], s[6:7], v[0:1]
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:3]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -882,18 +1036,28 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX950-LABEL: s_minimum_v2f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[18:19], s[18:19]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s3, s19, s3
+; GFX950-NEXT:    s_cselect_b32 s2, s18, s2
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], s[2:3], s[2:3]
+; GFX950-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GFX950-NEXT:    s_cselect_b32 s4, s3, s19
+; GFX950-NEXT:    s_cselect_b32 s5, s2, s18
+; GFX950-NEXT:    v_mov_b32_e32 v0, s5
+; GFX950-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX950-NEXT:    v_min_f64 v[2:3], s[2:3], v[0:1]
-; GFX950-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
-; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[16:17]
-; GFX950-NEXT:    v_min_f64 v[4:5], s[0:1], v[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], s[16:17], s[16:17]
+; GFX950-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX950-NEXT:    s_cselect_b32 s1, s17, s1
+; GFX950-NEXT:    s_cselect_b32 s0, s16, s0
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], s[0:1], s[0:1]
+; GFX950-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX950-NEXT:    s_cselect_b32 s2, s1, s17
+; GFX950-NEXT:    s_cselect_b32 s3, s0, s16
+; GFX950-NEXT:    v_mov_b32_e32 v0, s3
+; GFX950-NEXT:    v_mov_b32_e32 v1, s2
+; GFX950-NEXT:    v_min_f64 v[0:1], s[0:1], v[0:1]
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; use v[0:3]
 ; GFX950-NEXT:    ;;#ASMEND
@@ -902,14 +1066,24 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX10-LABEL: s_minimum_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[0:1], s[18:19], s[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[18:19], s[22:23]
-; GFX10-NEXT:    v_min_f64 v[4:5], s[16:17], s[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, s[16:17], s[20:21]
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v1, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s5
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, s[22:23], s[22:23]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, s[20:21], s[20:21]
+; GFX10-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s5, s23, s19
+; GFX10-NEXT:    s_cselect_b32 s4, s22, s18
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, s[4:5], s[4:5]
+; GFX10-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s7, s5, s23
+; GFX10-NEXT:    s_cselect_b32 s6, s4, s22
+; GFX10-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s9, s21, s17
+; GFX10-NEXT:    s_cselect_b32 s8, s20, s16
+; GFX10-NEXT:    v_min_f64 v[2:3], s[4:5], s[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, s[8:9], s[8:9]
+; GFX10-NEXT:    s_and_b32 s10, s10, exec_lo
+; GFX10-NEXT:    s_cselect_b32 s11, s9, s21
+; GFX10-NEXT:    s_cselect_b32 s10, s8, s20
+; GFX10-NEXT:    v_min_f64 v[0:1], s[8:9], s[10:11]
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:3]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -918,15 +1092,26 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
 ; GFX11-LABEL: s_minimum_v2f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[0:1], s[2:3], s[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, s[2:3], s[18:19]
-; GFX11-NEXT:    v_min_f64 v[4:5], s[0:1], s[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, s[0:1], s[16:17]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s0
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[18:19], s[18:19]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, s[16:17], s[16:17]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s3, s19, s3
+; GFX11-NEXT:    s_cselect_b32 s2, s18, s2
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[2:3], s[2:3]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s5, s3, s19
+; GFX11-NEXT:    s_cselect_b32 s4, s2, s18
+; GFX11-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s1, s17, s1
+; GFX11-NEXT:    s_cselect_b32 s0, s16, s0
+; GFX11-NEXT:    v_min_f64 v[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, s[0:1], s[0:1]
+; GFX11-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s7, s1, s17
+; GFX11-NEXT:    s_cselect_b32 s6, s0, s16
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], s[0:1], s[6:7]
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:3]
 ; GFX11-NEXT:    ;;#ASMEND
@@ -954,110 +1139,165 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
 ; GFX7-LABEL: v_minimum_v3f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v3f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX900-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX900-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT:    v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v8, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v11, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v10, v4, vcc
+; GFX950-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s5
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT:    v_min_f64 v[4:5], v[4:5], v[10:11]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v3f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[10:11], v[10:11]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v1 :: v_dual_cndmask_b32 v6, v6, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s1
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[10:11]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f64:
@@ -1135,110 +1375,165 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
 ; GFX7-LABEL: v_minimum_v3f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v3f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX900-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX900-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v9, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v8, v2, s[4:5]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v11, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, v10, v4, s[6:7]
+; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT:    v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v8, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v11, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v10, v4, vcc
+; GFX950-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s5
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s5
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT:    v_min_f64 v[4:5], v[4:5], v[10:11]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v3f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[10:11], v[10:11]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s1
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v1 :: v_dual_cndmask_b32 v6, v6, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v4, s1
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[10:11]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f64__nsz:
@@ -1316,135 +1611,209 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
 ; GFX7-LABEL: v_minimum_v4f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v4f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX900-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX900-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX900-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX900-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT:    v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v11, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT:    v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v13, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX950-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX950-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s6
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT:    v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT:    v_min_f64 v[6:7], v[6:7], v[14:15]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v4f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[12:13], v[12:13]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[14:15], v[14:15]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v9, v9, v1 :: v_dual_cndmask_b32 v8, v8, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s2
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT:    v_min_f64 v[6:7], v[6:7], v[14:15]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f64:
@@ -1529,135 +1898,209 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
 ; GFX7-LABEL: v_minimum_v4f64__nsz:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX7-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v4f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX900-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX900-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX900-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[10:11], v[10:11]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s[6:7]
+; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v11, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v10, v2, s[4:5]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v13, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v12, v4, s[6:7]
+; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX900-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT:    v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v11, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT:    v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v13, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX950-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v15, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX950-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s6
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s6
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT:    v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT:    v_min_f64 v[6:7], v[6:7], v[14:15]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v4f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[12:13], v[12:13]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[14:15], v[14:15]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v12, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s2
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v9, v9, v1 :: v_dual_cndmask_b32 v8, v8, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v6, s2
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT:    v_min_f64 v[6:7], v[6:7], v[14:15]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f64__nsz:
@@ -1742,244 +2185,395 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX7-LABEL: v_minimum_v8f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
-; GFX7-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX7-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX7-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
-; GFX7-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX7-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX7-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX7-NEXT:    v_min_f64 v[6:7], v[6:7], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX7-NEXT:    v_min_f64 v[8:9], v[8:9], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX7-NEXT:    v_min_f64 v[10:11], v[10:11], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX7-NEXT:    v_min_f64 v[12:13], v[12:13], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX7-NEXT:    v_min_f64 v[14:15], v[14:15], v[16:17]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v8f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
-; GFX8-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX8-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX8-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
-; GFX8-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX8-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX8-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX8-NEXT:    v_min_f64 v[6:7], v[6:7], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX8-NEXT:    v_min_f64 v[8:9], v[8:9], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX8-NEXT:    v_min_f64 v[10:11], v[10:11], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX8-NEXT:    v_min_f64 v[12:13], v[12:13], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX8-NEXT:    v_min_f64 v[14:15], v[14:15], v[16:17]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v8f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX900-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX900-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
-; GFX900-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX900-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX900-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
-; GFX900-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX900-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX900-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v19, v19, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v18, v18, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v21, v21, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v20, v20, v4, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v23, v23, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v22, v22, v6, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX900-NEXT:    v_min_f64 v[6:7], v[6:7], v[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v25, v25, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v24, v24, v8, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX900-NEXT:    v_min_f64 v[8:9], v[8:9], v[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v27, v27, v11, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v26, v26, v10, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX900-NEXT:    v_min_f64 v[10:11], v[10:11], v[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v29, v29, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v28, v28, v12, vcc
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX900-NEXT:    v_min_f64 v[12:13], v[12:13], v[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX900-NEXT:    v_min_f64 v[14:15], v[14:15], v[16:17]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v8f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    scratch_load_dword v31, off, s32
-; GFX950-NEXT:    v_mov_b32_e32 v54, 0x7ff80000
-; GFX950-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX950-NEXT:    v_min_f64 v[34:35], v[2:3], v[18:19]
-; GFX950-NEXT:    v_min_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX950-NEXT:    v_min_f64 v[38:39], v[6:7], v[22:23]
-; GFX950-NEXT:    v_min_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX950-NEXT:    v_min_f64 v[50:51], v[10:11], v[26:27]
-; GFX950-NEXT:    v_min_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v3, v19, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v2, v18, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v19, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v5, v21, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, v4, v20, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v21, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX950-NEXT:    v_min_f64 v[4:5], v[4:5], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v23, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v22, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v23, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX950-NEXT:    v_min_f64 v[6:7], v[6:7], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v25, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v24, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v25, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX950-NEXT:    v_min_f64 v[8:9], v[8:9], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v11, v27, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, v10, v26, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v27, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX950-NEXT:    v_min_f64 v[10:11], v[10:11], v[16:17]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v13, v29, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, v12, v28, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v29, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX950-NEXT:    v_min_f64 v[12:13], v[12:13], v[16:17]
+; GFX950-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v15, v31, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, v14, v30, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v31, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX950-NEXT:    v_min_f64 v[14:15], v[14:15], v[16:17]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v8f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
-; GFX10-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
-; GFX10-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
-; GFX10-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
-; GFX10-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
-; GFX10-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[18:19], v[18:19]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[20:21], v[20:21]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[22:23], v[22:23]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[24:25], v[24:25]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[26:27], v[26:27]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[28:29], v[28:29]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v21, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v23, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v25, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v27, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v29, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v20, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v22, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v24, s7
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v26, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v28, s9
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v19, v19, v3, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v21, v21, v5, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v23, v23, v7, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, v25, v9, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v27, v27, v11, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v29, v29, v13, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v18, v18, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v20, v20, v4, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v22, v22, v6, s6
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[16:17]
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, v24, v8, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v26, v26, v10, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v28, v28, v12, s9
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[18:19]
+; GFX10-NEXT:    v_min_f64 v[4:5], v[4:5], v[20:21]
+; GFX10-NEXT:    v_min_f64 v[6:7], v[6:7], v[22:23]
+; GFX10-NEXT:    v_min_f64 v[8:9], v[8:9], v[24:25]
+; GFX10-NEXT:    v_min_f64 v[10:11], v[10:11], v[26:27]
+; GFX10-NEXT:    v_min_f64 v[12:13], v[12:13], v[28:29]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[28:29], v[14:15], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[30:31], v[30:31]
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v31, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v30, s10
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, v31, v15, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, v30, v14, s10
+; GFX10-NEXT:    v_min_f64 v[14:15], v[14:15], v[16:17]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v8f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
-; GFX11-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
-; GFX11-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
-; GFX11-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
-; GFX11-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
-; GFX11-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[18:19], v[18:19]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[20:21], v[20:21]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[22:23], v[22:23]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[24:25], v[24:25]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[26:27], v[26:27]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[28:29], v[28:29]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v17 :: v_dual_cndmask_b32 v0, v0, v16
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v21, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v23, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v25, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v27, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v29, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v20, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v22, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v24, s3
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v26, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v28, s5
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX11-NEXT:    v_dual_cndmask_b32 v17, v17, v1 :: v_dual_cndmask_b32 v16, v16, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v19, v19, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v21, v21, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v23, v23, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v25, v25, v9, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v27, v27, v11, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v29, v29, v13, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v18, v18, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v20, v20, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v22, v22, v6, s2
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[16:17]
+; GFX11-NEXT:    v_cndmask_b32_e64 v24, v24, v8, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v26, v26, v10, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v28, v28, v12, s5
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[18:19]
+; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[20:21]
+; GFX11-NEXT:    v_min_f64 v[6:7], v[6:7], v[22:23]
+; GFX11-NEXT:    v_min_f64 v[8:9], v[8:9], v[24:25]
+; GFX11-NEXT:    v_min_f64 v[10:11], v[10:11], v[26:27]
+; GFX11-NEXT:    v_min_f64 v[12:13], v[12:13], v[28:29]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[28:29], v[14:15], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[30:31], v[30:31]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v31, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v30, s6
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v17, v31, v15, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v16, v30, v14, s6
+; GFX11-NEXT:    v_min_f64 v[14:15], v[14:15], v[16:17]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v8f64:
@@ -2010,118 +2604,165 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
+; GFX7-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX7-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
+; GFX7-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
+; GFX7-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX7-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX7-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX7-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX7-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX7-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX7-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX7-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX7-NEXT:    s_waitcnt vmcnt(12)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v0, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(10)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
 ; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX7-NEXT:    v_min_f64 v[6:7], v[6:7], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX7-NEXT:    v_min_f64 v[8:9], v[8:9], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX7-NEXT:    v_min_f64 v[10:11], v[10:11], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX7-NEXT:    v_min_f64 v[12:13], v[12:13], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v34, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v33, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v34, v34, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v33, v33, v2, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(10)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[35:36], v[35:36]
+; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[33:34]
+; GFX7-NEXT:    buffer_load_dword v34, off, s[0:3], s32
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, v5, v36, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v35, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v36, v36, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v35, v35, v4, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(9)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[37:38], v[37:38]
+; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[35:36]
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v38, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v6, v6, v37, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v38, v38, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v37, v37, v6, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(7)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX7-NEXT:    v_min_f64 v[6:7], v[6:7], v[37:38]
+; GFX7-NEXT:    v_cndmask_b32_e32 v9, v9, v49, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v8, v8, v48, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(5)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX7-NEXT:    v_min_f64 v[8:9], v[8:9], v[48:49]
+; GFX7-NEXT:    v_cndmask_b32_e32 v11, v11, v51, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v10, v10, v50, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v51, v51, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v50, v50, v10, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(3)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX7-NEXT:    v_min_f64 v[10:11], v[10:11], v[50:51]
+; GFX7-NEXT:    v_cndmask_b32_e32 v13, v13, v53, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v12, v12, v52, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v53, v53, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v52, v52, v12, vcc
+; GFX7-NEXT:    s_waitcnt vmcnt(1)
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_min_f64 v[12:13], v[12:13], v[52:53]
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v15, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, v14, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v14, vcc
 ; GFX7-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v17, v17, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v16, v16, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v16, vcc
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[16:17], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX7-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v19, v19, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v18, v18, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v19, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v18, vcc
 ; GFX7-NEXT:    v_min_f64 v[18:19], v[18:19], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v21, v21, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v20, v20, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v21, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v20, vcc
 ; GFX7-NEXT:    v_min_f64 v[20:21], v[20:21], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX7-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v23, v23, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v22, v22, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v23, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v22, vcc
 ; GFX7-NEXT:    v_min_f64 v[22:23], v[22:23], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX7-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v25, v25, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v24, v24, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v25, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v24, vcc
 ; GFX7-NEXT:    v_min_f64 v[24:25], v[24:25], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX7-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v27, v27, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v26, v26, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v27, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v26, vcc
 ; GFX7-NEXT:    v_min_f64 v[26:27], v[26:27], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX7-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc
 ; GFX7-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
-; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:124
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
-; GFX7-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
-; GFX7-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX7-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX7-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX7-NEXT:    v_cndmask_b32_e32 v33, v30, v31, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v34, v34, v32, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
+; GFX7-NEXT:    v_cndmask_b32_e32 v32, v32, v34, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v31, v31, v33, vcc
+; GFX7-NEXT:    v_min_f64 v[30:31], v[33:34], v[31:32]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v16f64:
@@ -2129,118 +2770,165 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
+; GFX8-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX8-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
+; GFX8-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
+; GFX8-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX8-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX8-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX8-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX8-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX8-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX8-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX8-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX8-NEXT:    s_waitcnt vmcnt(12)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v0, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(10)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
 ; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX8-NEXT:    v_min_f64 v[6:7], v[6:7], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX8-NEXT:    v_min_f64 v[8:9], v[8:9], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX8-NEXT:    v_min_f64 v[10:11], v[10:11], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX8-NEXT:    v_min_f64 v[12:13], v[12:13], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v34, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v33, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v34, v34, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v33, v33, v2, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(10)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[35:36], v[35:36]
+; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[33:34]
+; GFX8-NEXT:    buffer_load_dword v34, off, s[0:3], s32
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v36, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v35, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v36, v36, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v35, v35, v4, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(9)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[37:38], v[37:38]
+; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[35:36]
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v38, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v37, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v38, v38, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v37, v37, v6, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(7)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX8-NEXT:    v_min_f64 v[6:7], v[6:7], v[37:38]
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v49, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v8, v48, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(5)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX8-NEXT:    v_min_f64 v[8:9], v[8:9], v[48:49]
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v51, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v50, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v51, v51, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v50, v50, v10, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(3)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX8-NEXT:    v_min_f64 v[10:11], v[10:11], v[50:51]
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v13, v53, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v12, v52, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v53, v53, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v52, v52, v12, vcc
+; GFX8-NEXT:    s_waitcnt vmcnt(1)
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_min_f64 v[12:13], v[12:13], v[52:53]
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, v14, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v14, vcc
 ; GFX8-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v17, v17, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v16, v16, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v16, vcc
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[16:17], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v19, v19, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v18, v18, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v19, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v18, vcc
 ; GFX8-NEXT:    v_min_f64 v[18:19], v[18:19], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v21, v21, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v20, v20, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v21, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v20, vcc
 ; GFX8-NEXT:    v_min_f64 v[20:21], v[20:21], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX8-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v23, v23, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v22, v22, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v23, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v22, vcc
 ; GFX8-NEXT:    v_min_f64 v[22:23], v[22:23], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX8-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v25, v25, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v24, v24, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v25, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v24, vcc
 ; GFX8-NEXT:    v_min_f64 v[24:25], v[24:25], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX8-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v27, v27, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v26, v26, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v27, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v26, vcc
 ; GFX8-NEXT:    v_min_f64 v[26:27], v[26:27], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX8-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc
 ; GFX8-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
-; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:124
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
-; GFX8-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
-; GFX8-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX8-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX8-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX8-NEXT:    v_cndmask_b32_e32 v33, v30, v31, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v34, v34, v32, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
+; GFX8-NEXT:    v_cndmask_b32_e32 v32, v32, v34, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v31, v31, v33, vcc
+; GFX8-NEXT:    v_min_f64 v[30:31], v[33:34], v[31:32]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v16f64:
@@ -2248,118 +2936,165 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
+; GFX900-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX900-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
+; GFX900-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
+; GFX900-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX900-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX900-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX900-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX900-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX900-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX900-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX900-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX900-NEXT:    s_waitcnt vmcnt(12)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v0, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(10)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
 ; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX900-NEXT:    v_min_f64 v[6:7], v[6:7], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX900-NEXT:    v_min_f64 v[8:9], v[8:9], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX900-NEXT:    v_min_f64 v[10:11], v[10:11], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX900-NEXT:    v_min_f64 v[12:13], v[12:13], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v3, v3, v34, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v2, v33, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX900-NEXT:    v_cndmask_b32_e32 v34, v34, v3, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v33, v33, v2, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(10)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[35:36], v[35:36]
+; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[33:34]
+; GFX900-NEXT:    buffer_load_dword v34, off, s[0:3], s32
+; GFX900-NEXT:    v_cndmask_b32_e32 v5, v5, v36, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v4, v4, v35, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v36, v36, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v35, v35, v4, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(9)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[37:38], v[37:38]
+; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[35:36]
+; GFX900-NEXT:    v_cndmask_b32_e32 v7, v7, v38, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v6, v37, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v38, v38, v7, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v37, v37, v6, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(7)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX900-NEXT:    v_min_f64 v[6:7], v[6:7], v[37:38]
+; GFX900-NEXT:    v_cndmask_b32_e32 v9, v9, v49, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v8, v8, v48, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(5)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX900-NEXT:    v_min_f64 v[8:9], v[8:9], v[48:49]
+; GFX900-NEXT:    v_cndmask_b32_e32 v11, v11, v51, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v10, v50, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e32 v51, v51, v11, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v50, v50, v10, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(3)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX900-NEXT:    v_min_f64 v[10:11], v[10:11], v[50:51]
+; GFX900-NEXT:    v_cndmask_b32_e32 v13, v13, v53, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v12, v12, v52, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e32 v53, v53, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v52, v52, v12, vcc
+; GFX900-NEXT:    s_waitcnt vmcnt(1)
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_min_f64 v[12:13], v[12:13], v[52:53]
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v15, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, v14, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v15, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v14, vcc
 ; GFX900-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v17, v17, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v16, v16, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v16, vcc
 ; GFX900-NEXT:    v_min_f64 v[16:17], v[16:17], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX900-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v19, v19, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v18, v18, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v19, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v18, vcc
 ; GFX900-NEXT:    v_min_f64 v[18:19], v[18:19], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX900-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v21, v21, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v20, v20, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v21, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v20, vcc
 ; GFX900-NEXT:    v_min_f64 v[20:21], v[20:21], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX900-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v23, v23, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v22, v22, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v23, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v22, vcc
 ; GFX900-NEXT:    v_min_f64 v[22:23], v[22:23], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX900-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v25, v25, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v24, v24, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v25, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v24, vcc
 ; GFX900-NEXT:    v_min_f64 v[24:25], v[24:25], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX900-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v27, v27, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v26, v26, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v27, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v26, vcc
 ; GFX900-NEXT:    v_min_f64 v[26:27], v[26:27], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX900-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc
 ; GFX900-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
-; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX900-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:124
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
-; GFX900-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
-; GFX900-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX900-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX900-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX900-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX900-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[31:32], v[31:32]
+; GFX900-NEXT:    v_cndmask_b32_e32 v33, v30, v31, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v34, v34, v32, vcc
+; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[33:34], v[33:34]
+; GFX900-NEXT:    v_cndmask_b32_e32 v32, v32, v34, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v31, v31, v33, vcc
+; GFX900-NEXT:    v_min_f64 v[30:31], v[33:34], v[31:32]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v16f64:
@@ -2375,12 +3110,12 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX950-NEXT:    v_accvgpr_write_b32 a7, v47 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a8, v56 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:8
-; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:4
-; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:16
-; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:12
-; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:24
-; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT:    scratch_load_dword v51, off, s32 offset:8
+; GFX950-NEXT:    scratch_load_dword v50, off, s32 offset:4
+; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:16
+; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:12
+; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:24
+; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:20
 ; GFX950-NEXT:    scratch_load_dword v57, off, s32 offset:32
 ; GFX950-NEXT:    scratch_load_dword v56, off, s32 offset:28
 ; GFX950-NEXT:    scratch_load_dword v47, off, s32 offset:40
@@ -2395,270 +3130,375 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX950-NEXT:    scratch_load_dword v54, off, s32 offset:68
 ; GFX950-NEXT:    scratch_load_dword v53, off, s32 offset:80
 ; GFX950-NEXT:    scratch_load_dword v52, off, s32 offset:76
-; GFX950-NEXT:    scratch_load_dword v51, off, s32 offset:88
-; GFX950-NEXT:    scratch_load_dword v50, off, s32 offset:84
-; GFX950-NEXT:    scratch_load_dword v49, off, s32 offset:96
-; GFX950-NEXT:    scratch_load_dword v48, off, s32 offset:92
-; GFX950-NEXT:    scratch_load_dword v31, off, s32
 ; GFX950-NEXT:    scratch_load_dword v33, off, s32 offset:104
 ; GFX950-NEXT:    scratch_load_dword v32, off, s32 offset:100
+; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:96
+; GFX950-NEXT:    scratch_load_dword v49, off, s32 offset:88
+; GFX950-NEXT:    scratch_load_dword v48, off, s32 offset:84
+; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:92
 ; GFX950-NEXT:    v_accvgpr_write_b32 a10, v58 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a11, v59 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a12, v60 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_write_b32 a13, v61 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_write_b32 a14, v62 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_write_b32 a15, v63 ; Reload Reuse
-; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_min_f64 v[58:59], v[0:1], v[34:35]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[34:35]
-; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:112
-; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:108
-; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_min_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
-; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:120
-; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:116
-; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_min_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
-; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:128
-; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:124
-; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7ff80000
+; GFX950-NEXT:    s_waitcnt vmcnt(24)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v51, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v50, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX950-NEXT:    s_waitcnt vmcnt(22)
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[0:1], v[38:39], v[38:39]
+; GFX950-NEXT:    s_waitcnt vmcnt(20)
+; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], v[34:35], v[34:35]
+; GFX950-NEXT:    v_cndmask_b32_e32 v59, v51, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v58, v50, v0, vcc
+; GFX950-NEXT:    scratch_load_dword v51, off, s32 offset:112
+; GFX950-NEXT:    scratch_load_dword v50, off, s32 offset:108
+; GFX950-NEXT:    v_cndmask_b32_e64 v3, v3, v39, s[0:1]
+; GFX950-NEXT:    v_cndmask_b32_e64 v2, v2, v38, s[0:1]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e64 v5, v5, v35, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e64 v4, v4, v34, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e32 v39, v39, v3, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v38, v38, v2, vcc
+; GFX950-NEXT:    v_min_f64 v[2:3], v[2:3], v[38:39]
+; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:120
+; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:116
+; GFX950-NEXT:    scratch_load_dword v31, off, s32
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX950-NEXT:    v_min_f64 v[0:1], v[0:1], v[58:59]
+; GFX950-NEXT:    v_accvgpr_read_b32 v59, a11 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v61, v35, v5, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v60, v34, v4, vcc
+; GFX950-NEXT:    scratch_load_dword v35, off, s32 offset:128
+; GFX950-NEXT:    scratch_load_dword v34, off, s32 offset:124
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
-; GFX950-NEXT:    s_waitcnt vmcnt(23)
-; GFX950-NEXT:    v_min_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v57, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(21)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT:    v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(19)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(17)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX950-NEXT:    v_accvgpr_read_b32 v63, a15 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(15)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[56:57], v[56:57]
+; GFX950-NEXT:    v_min_f64 v[4:5], v[4:5], v[60:61]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v61, a13 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v7, v57, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, v6, v56, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX950-NEXT:    v_accvgpr_read_b32 v60, a12 ; Reload Reuse
+; GFX950-NEXT:    v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v57, v57, v7, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v56, v56, v6, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(23)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[46:47], v[46:47]
+; GFX950-NEXT:    v_min_f64 v[6:7], v[6:7], v[56:57]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v17, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(13)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v9, v47, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, v8, v46, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v47, v47, v9, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v46, v46, v8, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(21)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[44:45], v[44:45]
+; GFX950-NEXT:    v_min_f64 v[8:9], v[8:9], v[46:47]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v19, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(11)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v11, v45, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, v10, v44, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v46, a6 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v45, v45, v11, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v44, v44, v10, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(19)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[42:43], v[42:43]
+; GFX950-NEXT:    v_min_f64 v[10:11], v[10:11], v[44:45]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v21, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(9)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v13, v43, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, v12, v42, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v43, v43, v13, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v42, v42, v12, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(17)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[40:41], v[40:41]
+; GFX950-NEXT:    v_min_f64 v[12:13], v[12:13], v[42:43]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v23, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(6)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[24:25], v[32:33]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v15, v41, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, v14, v40, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v41, v41, v15, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v40, v40, v14, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(15)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[54:55], v[54:55]
+; GFX950-NEXT:    v_min_f64 v[14:15], v[14:15], v[40:41]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v25, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v17, v55, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, v16, v54, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v40, a0 ; Reload Reuse
-; GFX950-NEXT:    s_waitcnt vmcnt(4)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[26:27], v[34:35]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[34:35]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v55, v55, v17, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v54, v54, v16, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(13)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[52:53], v[52:53]
+; GFX950-NEXT:    v_min_f64 v[16:17], v[16:17], v[54:55]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v19, v19, v53, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v18, v18, v52, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v53, v53, v19, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v52, v52, v18, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(8)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[48:49], v[48:49]
+; GFX950-NEXT:    v_min_f64 v[18:19], v[18:19], v[52:53]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v21, v21, v49, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v20, v20, v48, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v49, v49, v21, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v48, v48, v20, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(7)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[36:37], v[36:37]
+; GFX950-NEXT:    v_min_f64 v[20:21], v[20:21], v[48:49]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v23, v23, v37, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v22, v22, v36, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v37, v37, v23, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v36, v36, v22, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[32:33], v[32:33]
+; GFX950-NEXT:    v_min_f64 v[22:23], v[22:23], v[36:37]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v25, v25, v33, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v24, v24, v32, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v33, v25, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v32, v24, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(5)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[50:51], v[50:51]
+; GFX950-NEXT:    v_min_f64 v[24:25], v[24:25], v[32:33]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v27, v27, v51, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v26, v26, v50, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v27, v1, v2, vcc
-; GFX950-NEXT:    s_waitcnt vmcnt(2)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v51, v27, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v50, v26, vcc
+; GFX950-NEXT:    s_waitcnt vmcnt(3)
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[38:39], v[38:39]
+; GFX950-NEXT:    v_min_f64 v[26:27], v[26:27], v[32:33]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v29, v29, v39, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v28, v28, v38, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v39, v29, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v38, v28, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
-; GFX950-NEXT:    v_min_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[34:35], v[34:35]
+; GFX950-NEXT:    v_min_f64 v[28:29], v[28:29], v[32:33]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v30, v30, v34, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v31, v31, v35, vcc
+; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v31, v1, v2, vcc
-; GFX950-NEXT:    v_mov_b32_e32 v0, v58
-; GFX950-NEXT:    v_mov_b32_e32 v1, v59
-; GFX950-NEXT:    v_mov_b32_e32 v2, v60
-; GFX950-NEXT:    v_accvgpr_read_b32 v60, a12 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_read_b32 v59, a11 ; Reload Reuse
-; GFX950-NEXT:    v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT:    v_cndmask_b32_e32 v33, v35, v31, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v32, v34, v30, vcc
+; GFX950-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v16f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    s_clause 0x18
-; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX10-NEXT:    s_clause 0x1c
 ; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
+; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX10-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16
 ; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
 ; GFX10-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:24
 ; GFX10-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX10-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX10-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:32
-; GFX10-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:28
-; GFX10-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:68
-; GFX10-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:64
-; GFX10-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:60
-; GFX10-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:56
-; GFX10-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:52
-; GFX10-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:48
-; GFX10-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:44
-; GFX10-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX10-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:100
-; GFX10-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:96
-; GFX10-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:92
-; GFX10-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:88
-; GFX10-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:84
-; GFX10-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:80
-; GFX10-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:76
-; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:72
+; GFX10-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:32
+; GFX10-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:28
+; GFX10-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:40
+; GFX10-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:36
+; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:48
+; GFX10-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:44
+; GFX10-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX10-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX10-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:64
+; GFX10-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:60
+; GFX10-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:72
+; GFX10-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:68
+; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:80
+; GFX10-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:76
+; GFX10-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:88
+; GFX10-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:84
+; GFX10-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:96
+; GFX10-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:92
+; GFX10-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:104
+; GFX10-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:100
+; GFX10-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:112
+; GFX10-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:108
+; GFX10-NEXT:    buffer_load_dword v39, off, s[0:3], s32
+; GFX10-NEXT:    s_waitcnt vmcnt(27)
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[31:32], v[31:32]
+; GFX10-NEXT:    s_waitcnt vmcnt(25)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[33:34], v[33:34]
 ; GFX10-NEXT:    s_waitcnt vmcnt(23)
-; GFX10-NEXT:    v_min_f64 v[82:83], v[0:1], v[31:32]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32]
-; GFX10-NEXT:    s_waitcnt vmcnt(21)
-; GFX10-NEXT:    v_min_f64 v[84:85], v[2:3], v[33:34]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[33:34]
-; GFX10-NEXT:    s_waitcnt vmcnt(19)
-; GFX10-NEXT:    v_min_f64 v[32:33], v[4:5], v[35:36]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[35:36]
-; GFX10-NEXT:    s_clause 0x7
-; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:104
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116
-; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128
-; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:124
-; GFX10-NEXT:    s_waitcnt vmcnt(24)
-; GFX10-NEXT:    v_min_f64 v[34:35], v[6:7], v[48:49]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[48:49]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[35:36], v[35:36]
 ; GFX10-NEXT:    s_waitcnt vmcnt(21)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[52:53]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[37:38], v[37:38]
 ; GFX10-NEXT:    s_waitcnt vmcnt(19)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[54:55]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[48:49], v[48:49]
 ; GFX10-NEXT:    s_waitcnt vmcnt(17)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[64:65]
-; GFX10-NEXT:    s_waitcnt vmcnt(16)
-; GFX10-NEXT:    v_min_f64 v[48:49], v[8:9], v[37:38]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[37:38]
-; GFX10-NEXT:    v_min_f64 v[36:37], v[10:11], v[64:65]
-; GFX10-NEXT:    v_min_f64 v[38:39], v[12:13], v[54:55]
-; GFX10-NEXT:    v_min_f64 v[54:55], v[14:15], v[52:53]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[50:51], v[50:51]
+; GFX10-NEXT:    s_waitcnt vmcnt(15)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[52:53], v[52:53]
+; GFX10-NEXT:    s_waitcnt vmcnt(13)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[54:55], v[54:55]
 ; GFX10-NEXT:    s_waitcnt vmcnt(11)
-; GFX10-NEXT:    v_min_f64 v[64:65], v[20:21], v[70:71]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[64:65], v[64:65]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v31, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v34, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v33, s4
 ; GFX10-NEXT:    s_waitcnt vmcnt(9)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
-; GFX10-NEXT:    s_waitcnt vmcnt(8)
-; GFX10-NEXT:    v_min_f64 v[52:53], v[16:17], v[50:51]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[16:17], v[50:51]
-; GFX10-NEXT:    v_min_f64 v[50:51], v[18:19], v[80:81]
-; GFX10-NEXT:    v_min_f64 v[70:71], v[22:23], v[68:69]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v34, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v35, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v48, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v49, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v36, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v37, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v38, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v54, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v55, 0x7ff80000, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, v52, 0, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v17, v53, 0x7ff80000, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v18, v50, 0, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v19, v51, 0x7ff80000, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v20, v64, 0, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v21, v65, 0x7ff80000, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v22, v70, 0, s14
-; GFX10-NEXT:    v_cndmask_b32_e64 v23, v71, 0x7ff80000, s14
-; GFX10-NEXT:    s_waitcnt vmcnt(6)
-; GFX10-NEXT:    v_min_f64 v[68:69], v[24:25], v[66:67]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[66:67], v[66:67]
+; GFX10-NEXT:    s_waitcnt vmcnt(7)
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[68:69], v[68:69]
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v36, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v38, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v49, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v48, s7
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v51, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v50, s8
+; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v35, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v37, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v53, s9
 ; GFX10-NEXT:    s_waitcnt vmcnt(5)
-; GFX10-NEXT:    v_min_f64 v[66:67], v[26:27], v[0:1]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s16, v[26:27], v[0:1]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[70:71], v[70:71]
 ; GFX10-NEXT:    s_waitcnt vmcnt(3)
-; GFX10-NEXT:    v_min_f64 v[80:81], v[28:29], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[80:81], v[80:81]
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v52, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v55, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, v17, v65, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v54, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, v16, v64, s11
+; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[4:5], v[4:5]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[6:7], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v19, v19, v67, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v18, v18, v66, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_cndmask_b32_e64 v21, v21, v69, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v20, v20, v68, s4
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[12:13], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e64 v32, v32, v1, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v31, v31, v0, s7
+; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[10:11], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e64 v34, v34, v3, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v33, v33, v2, s8
+; GFX10-NEXT:    v_cmp_u_f64_e64 s12, v[18:19], v[18:19]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX10-NEXT:    v_cndmask_b32_e64 v23, v23, v71, s5
+; GFX10-NEXT:    v_min_f64 v[2:3], v[2:3], v[33:34]
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:128
+; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:124
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, v25, v81, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v22, v22, v70, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, v24, v80, s6
+; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[14:15], v[14:15]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[16:17], v[16:17]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s13, v[20:21], v[20:21]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[22:23], v[22:23]
+; GFX10-NEXT:    v_cndmask_b32_e32 v49, v49, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v48, v48, v8, vcc_lo
+; GFX10-NEXT:    s_waitcnt vmcnt(5)
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[82:83], v[82:83]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s14, v[24:25], v[24:25]
+; GFX10-NEXT:    v_cndmask_b32_e64 v53, v53, v13, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v52, v52, v12, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v36, v36, v5, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v35, v35, v4, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v38, v38, v7, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v37, v37, v6, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v51, v51, v11, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v50, v50, v10, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v67, v67, v19, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v66, v66, v18, s12
+; GFX10-NEXT:    v_min_f64 v[4:5], v[4:5], v[35:36]
+; GFX10-NEXT:    v_min_f64 v[6:7], v[6:7], v[37:38]
+; GFX10-NEXT:    v_min_f64 v[8:9], v[8:9], v[48:49]
+; GFX10-NEXT:    v_min_f64 v[10:11], v[10:11], v[50:51]
+; GFX10-NEXT:    v_cndmask_b32_e64 v55, v55, v15, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v54, v54, v14, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v65, v65, v17, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v64, v64, v16, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v69, v69, v21, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v68, v68, v20, s13
+; GFX10-NEXT:    v_cndmask_b32_e32 v27, v27, v83, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v26, v26, v82, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v71, v71, v23, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v70, v70, v22, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v81, v81, v25, s14
+; GFX10-NEXT:    v_cndmask_b32_e64 v80, v80, v24, s14
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[26:27], v[26:27]
+; GFX10-NEXT:    v_min_f64 v[12:13], v[12:13], v[52:53]
+; GFX10-NEXT:    v_min_f64 v[14:15], v[14:15], v[54:55]
+; GFX10-NEXT:    v_min_f64 v[16:17], v[16:17], v[64:65]
+; GFX10-NEXT:    v_min_f64 v[18:19], v[18:19], v[66:67]
+; GFX10-NEXT:    v_min_f64 v[20:21], v[20:21], v[68:69]
+; GFX10-NEXT:    v_min_f64 v[22:23], v[22:23], v[70:71]
+; GFX10-NEXT:    v_min_f64 v[24:25], v[24:25], v[80:81]
+; GFX10-NEXT:    v_cndmask_b32_e32 v83, v83, v27, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v82, v82, v26, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[26:27], v[26:27], v[82:83]
+; GFX10-NEXT:    s_waitcnt vmcnt(2)
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[31:32], v[31:32]
+; GFX10-NEXT:    v_cndmask_b32_e32 v29, v29, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v28, v28, v31, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[86:87], v[30:31], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s18, v[30:31], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v82, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v83, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v84, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v85, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v32, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v33, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v24, v68, 0, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v26, v66, 0, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v28, v80, 0, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v29, v81, 0x7ff80000, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v30, v86, 0, s18
-; GFX10-NEXT:    v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[33:34], v[33:34]
+; GFX10-NEXT:    v_cndmask_b32_e32 v84, v30, v33, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v85, v39, v34, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[28:29], v[28:29]
+; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[84:85], v[84:85]
+; GFX10-NEXT:    v_cndmask_b32_e32 v32, v32, v29, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v31, v31, v28, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v34, v34, v85, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v33, v33, v84, s4
+; GFX10-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
+; GFX10-NEXT:    v_min_f64 v[30:31], v[84:85], v[33:34]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v16f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_clause 0x1f
-; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:8
-; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
-; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:16
-; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:12
-; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:24
-; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:20
-; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:32
-; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:28
-; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:40
-; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:36
-; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:48
-; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:44
-; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:56
-; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:52
-; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:64
-; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:60
+; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:64
+; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:56
+; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:48
+; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:40
+; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:32
+; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:24
+; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:16
+; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:8
+; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:4
+; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:12
+; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:20
+; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:28
+; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:36
+; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:44
+; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:52
+; GFX11-NEXT:    scratch_load_b32 v31, off, s32 offset:60
 ; GFX11-NEXT:    scratch_load_b32 v65, off, s32 offset:72
 ; GFX11-NEXT:    scratch_load_b32 v64, off, s32 offset:68
 ; GFX11-NEXT:    scratch_load_b32 v67, off, s32 offset:80
@@ -2675,86 +3515,135 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX11-NEXT:    scratch_load_b32 v84, off, s32 offset:116
 ; GFX11-NEXT:    scratch_load_b32 v87, off, s32 offset:128
 ; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:124
-; GFX11-NEXT:    s_waitcnt vmcnt(30)
-; GFX11-NEXT:    v_min_f64 v[96:97], v[0:1], v[32:33]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
-; GFX11-NEXT:    s_waitcnt vmcnt(28)
-; GFX11-NEXT:    v_min_f64 v[32:33], v[2:3], v[34:35]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
-; GFX11-NEXT:    s_waitcnt vmcnt(26)
-; GFX11-NEXT:    v_min_f64 v[34:35], v[4:5], v[36:37]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT:    scratch_load_b32 v39, off, s32
 ; GFX11-NEXT:    s_waitcnt vmcnt(24)
-; GFX11-NEXT:    v_min_f64 v[36:37], v[6:7], v[38:39]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[54:55], v[54:55]
+; GFX11-NEXT:    s_waitcnt vmcnt(23)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[52:53], v[52:53]
 ; GFX11-NEXT:    s_waitcnt vmcnt(22)
-; GFX11-NEXT:    v_min_f64 v[38:39], v[8:9], v[48:49]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[50:51], v[50:51]
+; GFX11-NEXT:    s_waitcnt vmcnt(21)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[48:49], v[48:49]
 ; GFX11-NEXT:    s_waitcnt vmcnt(20)
-; GFX11-NEXT:    v_min_f64 v[48:49], v[10:11], v[50:51]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[37:38], v[37:38]
+; GFX11-NEXT:    s_waitcnt vmcnt(19)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[35:36], v[35:36]
 ; GFX11-NEXT:    s_waitcnt vmcnt(18)
-; GFX11-NEXT:    v_min_f64 v[50:51], v[12:13], v[52:53]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
-; GFX11-NEXT:    s_waitcnt vmcnt(16)
-; GFX11-NEXT:    v_min_f64 v[52:53], v[14:15], v[54:55]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
-; GFX11-NEXT:    s_waitcnt vmcnt(14)
-; GFX11-NEXT:    v_min_f64 v[54:55], v[16:17], v[64:65]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
-; GFX11-NEXT:    s_waitcnt vmcnt(12)
-; GFX11-NEXT:    v_min_f64 v[64:65], v[18:19], v[66:67]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
-; GFX11-NEXT:    s_waitcnt vmcnt(10)
-; GFX11-NEXT:    v_min_f64 v[66:67], v[20:21], v[68:69]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
-; GFX11-NEXT:    s_waitcnt vmcnt(8)
-; GFX11-NEXT:    v_min_f64 v[68:69], v[22:23], v[70:71]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
-; GFX11-NEXT:    s_waitcnt vmcnt(6)
-; GFX11-NEXT:    v_min_f64 v[70:71], v[24:25], v[80:81]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
-; GFX11-NEXT:    s_waitcnt vmcnt(4)
-; GFX11-NEXT:    v_min_f64 v[80:81], v[26:27], v[82:83]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_min_f64 v[82:83], v[28:29], v[84:85]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[33:34], v[33:34]
+; GFX11-NEXT:    s_waitcnt vmcnt(17)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[31:32], v[31:32]
+; GFX11-NEXT:    s_waitcnt vmcnt(15)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[64:65], v[64:65]
+; GFX11-NEXT:    s_waitcnt vmcnt(13)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[66:67], v[66:67]
+; GFX11-NEXT:    s_waitcnt vmcnt(11)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[68:69], v[68:69]
+; GFX11-NEXT:    s_waitcnt vmcnt(9)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[70:71], v[70:71]
+; GFX11-NEXT:    s_waitcnt vmcnt(7)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[80:81], v[80:81]
+; GFX11-NEXT:    s_waitcnt vmcnt(5)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[82:83], v[82:83]
+; GFX11-NEXT:    s_waitcnt vmcnt(3)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[84:85], v[84:85]
+; GFX11-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[86:87], v[86:87]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v55 :: v_dual_cndmask_b32 v0, v0, v54
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v53, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v51, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v49, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v38, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v36, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v34, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v17, v17, v65, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v52, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v19, v19, v67, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v50, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v21, v21, v69, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v48, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v23, v23, v71, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v37, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v25, v25, v81, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v35, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v27, v27, v83, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v33, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v29, v29, v85, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v31, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v96, v30, v86, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v16, v16, v64, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v18, v18, v66, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v20, v20, v68, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v22, v22, v70, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v24, v24, v80, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v26, v26, v82, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v28, v28, v84, s13
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[84:85], v[30:31], v[86:87]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v96, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v32, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v34, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v36, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v38, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v48, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v50, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v52, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v16, v54, 0, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v18, v64, 0, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v20, v66, 0, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v22, v68, 0, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v24, v70, 0, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v26, v80, 0, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v28, v82, 0, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v30, v84, 0, s14
-; GFX11-NEXT:    v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v97, v39, v87, s14
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[2:3]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[4:5]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[6:7]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[8:9]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[10:11]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[12:13]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[14:15]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[16:17], v[16:17]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[18:19], v[18:19]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[20:21], v[20:21]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[22:23], v[22:23]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[24:25], v[24:25]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[26:27], v[26:27]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[28:29], v[28:29]
+; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[96:97], v[96:97]
+; GFX11-NEXT:    v_cndmask_b32_e32 v39, v55, v1, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v53, v53, v3, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v51, v51, v5, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v49, v49, v7, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v55, v38, v9, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v36, v36, v11, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v34, v34, v13, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v32, v32, v15, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v65, v65, v17, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v67, v67, v19, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v69, v69, v21, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v71, v71, v23, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v81, v81, v25, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v83, v83, v27, s12
+; GFX11-NEXT:    v_cndmask_b32_e32 v38, v54, v0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v85, v85, v29, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v52, v52, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v87, v87, v97, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v50, v50, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v48, v48, v6, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v54, v37, v8, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v35, v35, v10, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v33, v33, v12, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v31, v31, v14, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v64, v64, v16, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v66, v66, v18, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v68, v68, v20, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v70, v70, v22, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v80, v80, v24, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v82, v82, v26, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v84, v84, v28, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v86, v86, v96, s14
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[38:39]
+; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[52:53]
+; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[50:51]
+; GFX11-NEXT:    v_min_f64 v[6:7], v[6:7], v[48:49]
+; GFX11-NEXT:    v_min_f64 v[8:9], v[8:9], v[54:55]
+; GFX11-NEXT:    v_min_f64 v[10:11], v[10:11], v[35:36]
+; GFX11-NEXT:    v_min_f64 v[12:13], v[12:13], v[33:34]
+; GFX11-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX11-NEXT:    v_min_f64 v[16:17], v[16:17], v[64:65]
+; GFX11-NEXT:    v_min_f64 v[18:19], v[18:19], v[66:67]
+; GFX11-NEXT:    v_min_f64 v[20:21], v[20:21], v[68:69]
+; GFX11-NEXT:    v_min_f64 v[22:23], v[22:23], v[70:71]
+; GFX11-NEXT:    v_min_f64 v[24:25], v[24:25], v[80:81]
+; GFX11-NEXT:    v_min_f64 v[26:27], v[26:27], v[82:83]
+; GFX11-NEXT:    v_min_f64 v[28:29], v[28:29], v[84:85]
+; GFX11-NEXT:    v_min_f64 v[30:31], v[96:97], v[86:87]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll b/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
index 14b91793bd8da..3988767977bdc 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
@@ -5,10 +5,14 @@
 define amdgpu_ps i32 @uniform_v_to_s_i32(float inreg %a, float inreg %b) {
 ; GFX11-LABEL: uniform_v_to_s_i32:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
+; GFX11-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s1, s1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -19,14 +23,19 @@ define amdgpu_ps i32 @uniform_v_to_s_i32(float inreg %a, float inreg %b) {
 define amdgpu_ps i64 @uniform_v_to_s_i64(double inreg %a, double inreg %b) {
 ; GFX11-LABEL: uniform_v_to_s_i64:
 ; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[2:3], s[2:3]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX11-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[0:1], s[0:1]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX11-NEXT:    s_cselect_b32 s2, s0, s2
 ; GFX11-NEXT:    v_max_f64 v[0:1], s[0:1], s[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %cast = bitcast double %max0 to i64
@@ -36,16 +45,22 @@ define amdgpu_ps i64 @uniform_v_to_s_i64(double inreg %a, double inreg %b) {
 define amdgpu_ps <2 x i32> @uniform_v_to_s_2_i32(<2 x float> inreg %a, <2 x float> inreg %b) {
 ; GFX11-LABEL: uniform_v_to_s_2_i32:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_max_f32_e64 v0, s0, s2
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    v_max_f32_e64 v1, s1, s3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s2, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s3, s3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s1, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, s2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, s3, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_max_f32 v1, v1, v3 :: v_dual_max_f32 v0, v0, v2
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call <2 x float> @llvm.maximum.f32(<2 x float> %a, <2 x float> %b)
   %cast = bitcast <2 x float> %max0 to <2 x i32>
@@ -59,10 +74,10 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
 ; GFX11-NEXT:    s_mov_b32 s1, 0
 ; GFX11-NEXT:    flat_load_b32 v0, v[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v1, 1.0, v0
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 1.0, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
@@ -77,14 +92,19 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
 define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b) {
 ; GFX11-LABEL: uniform_v_to_s_double:
 ; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[2:3], s[2:3]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s1, s3, s1
+; GFX11-NEXT:    s_cselect_b32 s0, s2, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s4, s[0:1], s[0:1]
+; GFX11-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT:    s_cselect_b32 s3, s1, s3
+; GFX11-NEXT:    s_cselect_b32 s2, s0, s2
 ; GFX11-NEXT:    v_max_f64 v[0:1], s[0:1], s[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   ret double %max0
@@ -93,10 +113,14 @@ define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b)
 define amdgpu_ps <2 x i16> @uniform_v_to_s_2_i16(float inreg %a, float inreg %b) {
 ; GFX11-LABEL: uniform_v_to_s_2_i16:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
+; GFX11-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-NEXT:    v_cmp_u_f32_e64 vcc_lo, s1, s1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -107,19 +131,27 @@ define amdgpu_ps <2 x i16> @uniform_v_to_s_2_i16(float inreg %a, float inreg %b)
 define amdgpu_ps i16 @uniform_v_to_s_i16(half inreg %a, half inreg %b) {
 ; GFX11-TRUE16-LABEL: uniform_v_to_s_i16:
 ; GFX11-TRUE16:       ; %bb.0:
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
-; GFX11-TRUE16-NEXT:    v_max_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s1, s1
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s0, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s1, v0.l, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-FAKE16-LABEL: uniform_v_to_s_i16:
 ; GFX11-FAKE16:       ; %bb.0:
-; GFX11-FAKE16-NEXT:    v_max_f16_e64 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 vcc_lo, s1, s1
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s1, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
   %max = call half @llvm.maximum.f16(half %a, half %b)
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
index 9c54c30daefe0..693d5d1d1930c 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
@@ -14,59 +14,72 @@ define half @test_vector_reduce_fmaximum_v2half(<2 x half> %v) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v2half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v2half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f16_sdwa v1, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v2half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f16_sdwa v1, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v0 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v2half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half:
@@ -102,81 +115,108 @@ define half @test_vector_reduce_fmaximum_v3half(<3 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v3half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v3half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v3half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v0 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v3half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half:
@@ -225,101 +265,144 @@ define half @test_vector_reduce_fmaximum_v4half(<4 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_max_f32_e32 v4, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v4half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v4, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_max_f16_e32 v3, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v3
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v4half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v3
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v4half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v0 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.h
-; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v4half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v3, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v1, v0, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half:
@@ -361,190 +444,313 @@ define half @test_vector_reduce_fmaximum_v8half(<8 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_max_f32_e32 v8, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v8half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v8, v0, v7
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_max_f16_e32 v7, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v7
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v7, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v8half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_max_f16_e32 v2, v6, v0
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_max_f16_e32 v6, v0, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v8half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_max_f16_e32 v5, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_max_f16_e32 v4, v0, v6
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v2, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v0, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.h, v2.h
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.h, v3.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v3.h, v1.h, s0
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, s0
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v1.l, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.h, v3.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.h, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v8half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v4, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v5, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v3, v0, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half:
@@ -591,381 +797,635 @@ define half @test_vector_reduce_fmaximum_v16half(<16 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_max_f32_e32 v16, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v8
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v16half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 16, v0
-; GFX8-NEXT:    v_max_f16_e32 v16, v0, v15
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX8-NEXT:    v_max_f16_e32 v15, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v15
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v15, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v14
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v14
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v13
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v13
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v12
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v11
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v10
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v9
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v7
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f16_e32 v1, v0, v8
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v16half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v8, v2, v6
-; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
+; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
 ; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v8, vcc
-; GFX9-NEXT:    v_pk_max_f16 v8, v0, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX9-NEXT:    v_perm_b32 v6, v2, v10, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v11, v9, v8, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT:    v_perm_b32 v4, v0, v11, s0
-; GFX9-NEXT:    v_pk_max_f16 v4, v4, v6
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v11, v10
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v9, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v4, vcc
-; GFX9-NEXT:    v_max_f16_e32 v2, v6, v0
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v0
-; GFX9-NEXT:    v_pk_max_f16 v6, v1, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX9-NEXT:    v_perm_b32 v4, v9, v4, s0
+; GFX9-NEXT:    v_perm_b32 v0, v8, v0, s0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
+; GFX9-NEXT:    v_perm_b32 v6, v8, v6, s0
+; GFX9-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-NEXT:    v_pk_max_f16 v2, v2, v6
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v2, vcc
-; GFX9-NEXT:    v_pk_max_f16 v2, v3, v7
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v9, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v4, vcc
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX9-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX9-NEXT:    v_perm_b32 v4, v4, v5, s0
+; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-NEXT:    v_pk_max_f16 v1, v1, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v7
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX9-NEXT:    v_perm_b32 v3, v2, v4, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v3, vcc
+; GFX9-NEXT:    v_perm_b32 v4, v4, v5, s0
+; GFX9-NEXT:    v_pk_max_f16 v2, v2, v4
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v5, v1, v7, s0
-; GFX9-NEXT:    v_pk_max_f16 v3, v5, v3
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v7, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v9, v3, vcc
-; GFX9-NEXT:    v_max_f16_e32 v5, v0, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-NEXT:    v_perm_b32 v2, v2, v4, s0
+; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v9, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v16half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_max_f16 v8, v2, v6
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_pk_max_f16 v9, v0, v4
-; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v9, v3, v7
-; GFX10-NEXT:    v_perm_b32 v4, v2, v8, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v9
-; GFX10-NEXT:    v_pk_max_f16 v11, v1, v5
-; GFX10-NEXT:    v_perm_b32 v10, v0, v6, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_max_f16 v4, v10, v4
-; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v12, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v11, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v8
-; GFX10-NEXT:    v_perm_b32 v6, v3, v9, 0x5040100
-; GFX10-NEXT:    v_perm_b32 v8, v1, v7, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_pk_max_f16 v2, v8, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v9
-; GFX10-NEXT:    v_max_f16_e32 v5, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_max_f16_e32 v4, v0, v6
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v2, v10, v2, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX10-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v7
+; GFX10-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v3, v10, v3, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX10-NEXT:    v_perm_b32 v6, v11, v7, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
+; GFX10-NEXT:    v_pk_max_f16 v3, v3, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v4, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v8, v2, v6
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.h, v6.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v2, v0, v4
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v0.h, v4.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v8.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v8.h, s0
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v7.l
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v2.l, s1
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v2.h, s2
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v3.h, v7.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v1.h, v5.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v4, v2, v0
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2.h, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v2, v3, v7
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v3, v1, v5
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v2.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6.h, v6.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v6.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v4.h, v4.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v4.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v6.h, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v2.h, s1
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v3.l, s2
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v3.h, s3
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v6.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v4.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v4.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, v2.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, v2.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5.h, v5.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, v0.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, v0.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v7.h, v7.h
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v7.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v5.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v5.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v2.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v7.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v7.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v5.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s3
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.h, v3.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.h, v7.h, v3.h, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.l, v7.l, v3.l, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, v1.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v3, v3, v4
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, s1
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v3, v2, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.h, v3.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v1.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v3.h, v1.h, s1
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v3.l, s0
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.h, v1.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v3.h, s0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v16half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v8, v2, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v10, v0, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v8, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v2, v9, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v10, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v10, v3, v7
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v12, v1, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v13, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v8, v4, v0, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v12
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v6, v8, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v10, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v10, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v14, v13
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v15, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v9
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v6, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v4, v3, v8, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v5, v1, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v10, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v7
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v3, v10, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v11, v7, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v3, v3, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v2
 ; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_max_f16 v4, v6, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v7, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v8
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half:
@@ -1016,46 +1476,54 @@ define float @test_vector_reduce_fmaximum_v2float(<2 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v2float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v2float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v2float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v2float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v2float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v2float:
@@ -1076,63 +1544,83 @@ define float @test_vector_reduce_fmaximum_v3float(<3 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v3float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v3float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v3float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v3float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v3, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v3float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v3, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v3float:
@@ -1153,80 +1641,111 @@ define float @test_vector_reduce_fmaximum_v4float(<4 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v4float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v4, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v4float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v4, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v4float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v4, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v4float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v4, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v4float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v4, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v4float:
@@ -1249,148 +1768,225 @@ define float @test_vector_reduce_fmaximum_v8float(<8 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v8float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v8, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v8float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v8, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v8float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v8, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v8float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v8, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v8float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v8, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v8float:
@@ -1416,284 +2012,453 @@ define float @test_vector_reduce_fmaximum_v16float(<16 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v16float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v16, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v8
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_max_f32_e32 v1, v0, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v16float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v16, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v8
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v9
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v10
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v11
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v12
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v13
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v14
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_max_f32_e32 v1, v0, v15
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v16float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v16, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v8
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v9
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v10
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v11
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v12
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v13
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v14
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_max_f32_e32 v1, v0, v15
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v16float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v16, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v8
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v9
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v9
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v10
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v10
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v11
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v12
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v13
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v13
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v14
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_max_f32_e32 v1, v0, v15
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v15
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc_lo
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v16float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f32_e32 v16, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v6
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v8
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v9
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v10
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v10
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v11
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v11
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v12
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v12
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v13
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v14
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_max_f32_e32 v1, v0, v15
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v15
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc_lo
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v16float:
@@ -1726,51 +2491,62 @@ define double @test_vector_reduce_fmaximum_v2double(<2 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v2double:
@@ -1791,74 +2567,99 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v3double:
@@ -1881,96 +2682,135 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v7, v1 :: v_dual_cndmask_b32 v2, v6, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v4double:
@@ -1994,186 +2834,281 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v7, v1 :: v_dual_cndmask_b32 v2, v6, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v9, v1 :: v_dual_cndmask_b32 v2, v8, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v11, v1 :: v_dual_cndmask_b32 v2, v10, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v13, v1 :: v_dual_cndmask_b32 v2, v12, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v15, v1 :: v_dual_cndmask_b32 v2, v14, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v8double:
@@ -2203,376 +3138,583 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v31, off, s32
-; GFX9-NEXT:    v_max_f64 v[32:33], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[18:19], v[18:19]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[20:21], v[20:21]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[22:23], v[22:23]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[24:25], v[24:25]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[26:27], v[26:27]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[28:29], v[28:29]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[30:31], v[30:31]
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v7, v1 :: v_dual_cndmask_b32 v2, v6, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v9, v1 :: v_dual_cndmask_b32 v2, v8, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v11, v1 :: v_dual_cndmask_b32 v2, v10, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v13, v1 :: v_dual_cndmask_b32 v2, v12, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v15, v1 :: v_dual_cndmask_b32 v2, v14, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v17 :: v_dual_cndmask_b32 v0, v0, v16
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v17, v1 :: v_dual_cndmask_b32 v2, v16, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[18:19], v[18:19]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v19 :: v_dual_cndmask_b32 v0, v0, v18
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v19, v1 :: v_dual_cndmask_b32 v2, v18, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[20:21], v[20:21]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v21 :: v_dual_cndmask_b32 v0, v0, v20
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v21, v1 :: v_dual_cndmask_b32 v2, v20, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[22:23], v[22:23]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v23 :: v_dual_cndmask_b32 v0, v0, v22
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v23, v1 :: v_dual_cndmask_b32 v2, v22, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[24:25], v[24:25]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v25 :: v_dual_cndmask_b32 v0, v0, v24
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v25, v1 :: v_dual_cndmask_b32 v2, v24, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[26:27], v[26:27]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v27 :: v_dual_cndmask_b32 v0, v0, v26
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v27, v1 :: v_dual_cndmask_b32 v2, v26, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[28:29], v[28:29]
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v29 :: v_dual_cndmask_b32 v0, v0, v28
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v29, v1 :: v_dual_cndmask_b32 v2, v28, v0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[30:31], v[30:31]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v30 :: v_dual_cndmask_b32 v1, v1, v31
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v31, v1 :: v_dual_cndmask_b32 v2, v30, v0
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v16double:
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
index 9d3a4f387bfc8..300efa6d7fc7f 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
@@ -16,59 +16,72 @@ define half @test_vector_reduce_fminimum_v2half(<2 x half> %v) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v2half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v2half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f16_sdwa v1, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v2half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f16_sdwa v1, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v0 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v2half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
@@ -126,81 +139,108 @@ define half @test_vector_reduce_fminimum_v3half(<3 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v3half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v3half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v3half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v0 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v3half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
@@ -274,101 +314,144 @@ define half @test_vector_reduce_fminimum_v4half(<4 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_min_f32_e32 v4, v0, v1
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v4half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v4, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v3
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v4half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v3
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v4half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f16_sdwa v2, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v0 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.h
-; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v4half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v3, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v1, v0, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
@@ -439,190 +522,313 @@ define half @test_vector_reduce_fminimum_v8half(<8 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_min_f32_e32 v8, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v8half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v8, v0, v7
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_min_f16_e32 v7, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v7
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v7, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v8half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_min_f16_e32 v2, v6, v0
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_min_f16_e32 v6, v0, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_perm_b32 v3, v4, v3, s0
+; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v8half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_min_f16_e32 v5, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_min_f16_e32 v4, v0, v6
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, v2, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v0, v2, s4
+; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v0, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX10-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.h, v2.h
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.h, v3.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v3.h, v1.h, s0
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, s0
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v1.l, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.h, v3.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.h, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v8half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v4, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v4, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v5, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v3, v0, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
@@ -711,381 +917,635 @@ define half @test_vector_reduce_fminimum_v16half(<16 x half> %v) {
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_min_f32_e32 v16, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v8
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v16half:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 16, v0
-; GFX8-NEXT:    v_min_f16_e32 v16, v0, v15
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX8-NEXT:    v_min_f16_e32 v15, v0, v1
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v15, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v15
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v15, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v14
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v14
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v13
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v13
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v12
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v11
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v10
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v9
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v7
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f16_e32 v1, v0, v8
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v16half:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v8, v2, v6
-; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
+; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v9, v9
 ; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v8, vcc
-; GFX9-NEXT:    v_pk_min_f16 v8, v0, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX9-NEXT:    v_perm_b32 v6, v2, v10, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v11, v9, v8, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT:    v_perm_b32 v4, v0, v11, s0
-; GFX9-NEXT:    v_pk_min_f16 v4, v4, v6
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v11, v10
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v9, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v4, vcc
-; GFX9-NEXT:    v_min_f16_e32 v2, v6, v0
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v0
-; GFX9-NEXT:    v_pk_min_f16 v6, v1, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX9-NEXT:    v_perm_b32 v4, v9, v4, s0
+; GFX9-NEXT:    v_perm_b32 v0, v8, v0, s0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v8, v8
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v6, v6
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
+; GFX9-NEXT:    v_perm_b32 v6, v8, v6, s0
+; GFX9-NEXT:    v_perm_b32 v2, v4, v2, s0
+; GFX9-NEXT:    v_pk_min_f16 v2, v2, v6
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v2, vcc
-; GFX9-NEXT:    v_pk_min_f16 v2, v3, v7
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v9, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v4, vcc
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_perm_b32 v2, v2, v6, s0
+; GFX9-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v5
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v5, v5
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
+; GFX9-NEXT:    v_perm_b32 v4, v4, v5, s0
+; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s0
+; GFX9-NEXT:    v_pk_min_f16 v1, v1, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v7
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX9-NEXT:    v_perm_b32 v3, v2, v4, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v7, v7
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-NEXT:    v_perm_b32 v2, v2, v3, s0
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v3, vcc
+; GFX9-NEXT:    v_perm_b32 v4, v4, v5, s0
+; GFX9-NEXT:    v_pk_min_f16 v2, v2, v4
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v5, v1, v7, s0
-; GFX9-NEXT:    v_pk_min_f16 v3, v5, v3
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v7, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v3, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v9, v3, vcc
-; GFX9-NEXT:    v_min_f16_e32 v5, v0, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GFX9-NEXT:    v_perm_b32 v2, v2, v4, s0
+; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v9, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v16half:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_min_f16 v8, v2, v6
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX10-NEXT:    v_pk_min_f16 v9, v0, v4
-; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v9, v3, v7
-; GFX10-NEXT:    v_perm_b32 v4, v2, v8, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v9
-; GFX10-NEXT:    v_pk_min_f16 v11, v1, v5
-; GFX10-NEXT:    v_perm_b32 v10, v0, v6, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_pk_min_f16 v4, v10, v4
-; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v12, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v11, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v10, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v8
-; GFX10-NEXT:    v_perm_b32 v6, v3, v9, 0x5040100
-; GFX10-NEXT:    v_perm_b32 v8, v1, v7, 0x5040100
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_pk_min_f16 v2, v8, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v9
-; GFX10-NEXT:    v_min_f16_e32 v5, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_min_f16_e32 v4, v0, v6
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, v4, s4
-; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX10-NEXT:    v_perm_b32 v2, v10, v2, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v3
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX10-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v7
+; GFX10-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX10-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_perm_b32 v3, v10, v3, 0x5040100
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX10-NEXT:    v_perm_b32 v6, v11, v7, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
+; GFX10-NEXT:    v_pk_min_f16 v3, v3, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v4, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
 ; GFX11-SDAG-TRUE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v8, v2, v6
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.h, v6.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v2, v0, v4
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v0.h, v4.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v8.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v8.h, s0
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v7.l
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v2.l, s1
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v2.h, s2
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v3.h, v7.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v1.h, v5.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v4, v2, v0
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2.h, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v2, v3, v7
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v3, v1, v5
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.h, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v2.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6.h, v6.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v6.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v4.h, v4.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v4.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v6.h, vcc_lo
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v2.h, s1
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v3.l, s2
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v3.h, s3
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v6.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v4.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v4.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v2.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, v2.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, v2.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5.h, v5.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, v0.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, v0.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v7.h, v7.h
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v7.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v5.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v5.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s3, v2.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, v7.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v7.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v5.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, s3
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.h, v3.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v3.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.h, v7.h, v3.h, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v4.l, v7.l, v3.l, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, v1.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v3, v3, v4
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, v0.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, v0.h, s1
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v3, v2, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v3.h, v3.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v3.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, v3.h, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, v1.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e64 s1, v1.h, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v1.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v2.h, v3.h, v1.h, s1
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v3.l, s0
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.h, v1.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v3.h, s0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.h, v1.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
-; GFX11-SDAG-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.h, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v0.l, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
 ; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v16half:
 ; GFX11-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v8, v2, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v10, v0, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v8, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v2, v9, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v10, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v10, v3, v7
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v12, v1, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v13, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v8, v4, v0, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v12
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v6, v8, v6
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v10, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
 ; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v10, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v14, v13
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v15, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v9
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v6, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v4, v3, v8, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v5, v1, 0x5040100
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v8, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v4, v8, v4, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v10, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v11, v6, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v7
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v6, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v9, v9
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v8, v8
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v11, v11
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v7, v7
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v10, v10
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v8, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v11, v10, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v3, v10, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v6, v11, v7, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v3, v3, v6
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v9, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v2, v9, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v5, v5
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v2
 ; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_pk_min_f16 v4, v6, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v7, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v8
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v3
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
@@ -1196,46 +1656,54 @@ define float @test_vector_reduce_fminimum_v2float(<2 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v2float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v2float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v2float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v2float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v2float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fminimum_v2float:
@@ -1256,63 +1724,83 @@ define float @test_vector_reduce_fminimum_v3float(<3 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v3float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v3float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v3float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v3float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v3, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v3float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v3, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v3float:
@@ -1345,80 +1833,111 @@ define float @test_vector_reduce_fminimum_v4float(<4 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v4float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v4, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v4float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v4, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v4float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v4, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v4float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v4, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v4float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v4, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4float:
@@ -1454,148 +1973,225 @@ define float @test_vector_reduce_fminimum_v8float(<8 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v8float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v8, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v8float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v8, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v8float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v8, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v8float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v8, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v8float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v8, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8float:
@@ -1639,284 +2235,453 @@ define float @test_vector_reduce_fminimum_v16float(<16 x float> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v16float:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v16, v0, v1
-; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v8
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v9
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v10
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v11
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v12
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v13
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v14
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX7-NEXT:    v_min_f32_e32 v1, v0, v15
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v16float:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v16, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v8
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v9
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v10
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v11
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v12
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v13
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v14
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX8-NEXT:    v_min_f32_e32 v1, v0, v15
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v16float:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v16, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v8
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v9
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v9
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v10
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v10
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v11
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v11
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v12
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v12
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v13
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v13
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v14
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v14
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
-; GFX9-NEXT:    v_min_f32_e32 v1, v0, v15
-; GFX9-NEXT:    v_cmp_o_f32_e32 vcc, v0, v15
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v17, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc
+; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v16float:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v16, v0, v1
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v8
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v9
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v9
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v10
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v10
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v11
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v12
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v13
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v13
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v14
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT:    v_min_f32_e32 v1, v0, v15
-; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v15
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc_lo
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v16float:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f32_e32 v16, v0, v1
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v3
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v4
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v5
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v5
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v6
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v6
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v7
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v7
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v6, v6
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v7, v7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v8
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v8, v8
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v9, v9
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v9
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v9
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v10, v10
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v11, v11
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v10
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v10
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v11, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v12, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v13, v13
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v11
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v11
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v13, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v14, v14
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v14, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v15, v15
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v12
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v12
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v13
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v14
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT:    v_min_f32_e32 v1, v0, v15
-; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v15
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v15, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v15, v0, vcc_lo
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16float:
@@ -1975,51 +2740,62 @@ define double @test_vector_reduce_fminimum_v2double(<2 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fminimum_v2double:
@@ -2040,74 +2816,99 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fminimum_v3double:
@@ -2130,96 +2931,135 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v7, v1 :: v_dual_cndmask_b32 v2, v6, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4double:
@@ -2256,186 +3096,281 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v7, v1 :: v_dual_cndmask_b32 v2, v6, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v9, v1 :: v_dual_cndmask_b32 v2, v8, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v11, v1 :: v_dual_cndmask_b32 v2, v10, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v13, v1 :: v_dual_cndmask_b32 v2, v12, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v15, v1 :: v_dual_cndmask_b32 v2, v14, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8double:
@@ -2483,376 +3418,583 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) {
 ; GFX7-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX7-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc
+; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc
+; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v31, off, s32
-; GFX9-NEXT:    v_min_f64 v[32:33], v[0:1], v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[4:5]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[6:7]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[8:9]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[10:11]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[12:13]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[14:15]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[16:17]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[18:19]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[20:21]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[22:23]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[24:25]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[26:27]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[28:29]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[30:31]
+; GFX9-NEXT:    s_nop 1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc
+; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v6, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v9, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v8, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v11, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v12, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v15, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v14, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v17, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v16, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[18:19], v[18:19]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v19, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v19, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v18, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[20:21], v[20:21]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v21, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v20, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v21, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v20, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[22:23], v[22:23]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v23, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v22, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v23, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v22, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[24:25], v[24:25]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v25, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v24, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v25, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v24, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[26:27], v[26:27]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v27, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v26, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v27, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v26, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[28:29], v[28:29]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v29, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v28, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v29, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v28, v0, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[30:31], v[30:31]
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v30, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v31, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v31, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v30, v0, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v5, v1 :: v_dual_cndmask_b32 v2, v4, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[6:7], v[6:7]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v7, v1 :: v_dual_cndmask_b32 v2, v6, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[8:9], v[8:9]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v9, v1 :: v_dual_cndmask_b32 v2, v8, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[10:11], v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v11, v1 :: v_dual_cndmask_b32 v2, v10, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[12:13], v[12:13]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v13, v1 :: v_dual_cndmask_b32 v2, v12, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[14:15], v[14:15]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v15, v1 :: v_dual_cndmask_b32 v2, v14, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[16:17], v[16:17]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v17 :: v_dual_cndmask_b32 v0, v0, v16
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v17, v1 :: v_dual_cndmask_b32 v2, v16, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[18:19], v[18:19]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v19 :: v_dual_cndmask_b32 v0, v0, v18
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v19, v1 :: v_dual_cndmask_b32 v2, v18, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[20:21], v[20:21]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v21 :: v_dual_cndmask_b32 v0, v0, v20
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v21, v1 :: v_dual_cndmask_b32 v2, v20, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[22:23], v[22:23]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v23 :: v_dual_cndmask_b32 v0, v0, v22
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v23, v1 :: v_dual_cndmask_b32 v2, v22, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[24:25], v[24:25]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v25 :: v_dual_cndmask_b32 v0, v0, v24
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v25, v1 :: v_dual_cndmask_b32 v2, v24, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[26:27], v[26:27]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v27 :: v_dual_cndmask_b32 v0, v0, v26
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v27, v1 :: v_dual_cndmask_b32 v2, v26, v0
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[28:29], v[28:29]
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v29 :: v_dual_cndmask_b32 v0, v0, v28
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v29, v1 :: v_dual_cndmask_b32 v2, v28, v0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[30:31], v[30:31]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v30 :: v_dual_cndmask_b32 v1, v1, v31
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v3, v31, v1 :: v_dual_cndmask_b32 v2, v30, v0
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16double:
diff --git a/llvm/test/CodeGen/Mips/fp-maximum-minimum.ll b/llvm/test/CodeGen/Mips/fp-maximum-minimum.ll
new file mode 100644
index 0000000000000..07a077f84d922
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/fp-maximum-minimum.ll
@@ -0,0 +1,976 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=mipsisa32r6 < %s | FileCheck %s --check-prefix=MIPS32R6
+; RUN: llc --mtriple=mips64 -mattr=+mips64r2 < %s | FileCheck %s --check-prefix=MIPS64R2
+; RUN: llc --mtriple=mips64 -mattr=+mips64 < %s | FileCheck %s --check-prefix=MIPS64
+; RUN: llc --mtriple=mips -mattr=+mips32r2 < %s | FileCheck %s --check-prefix=MIPS32R2
+; RUN: llc --mtriple=mips -mattr=+mips32 < %s | FileCheck %s --check-prefix=MIPS32
+
+declare float @llvm.maximum.f32(float, float)
+declare double @llvm.maximum.f64(double, double)
+declare float @llvm.minimum.f32(float, float)
+declare double @llvm.minimum.f64(double, double)
+
+define float @maximum_float(float %x, float %y) {
+; MIPS32R6-LABEL: maximum_float:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    sel.s $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.s $f1, $f0, $f0
+; MIPS32R6-NEXT:    sel.s $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: maximum_float:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.s $f0, $f13
+; MIPS64R2-NEXT:    c.un.s $f13, $f13
+; MIPS64R2-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ole.s $f12, $f13
+; MIPS64R2-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    mfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.s $f1, $f0
+; MIPS64R2-NEXT:    movz.s $f1, $f12, $1
+; MIPS64R2-NEXT:    mtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.s $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: maximum_float:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.s $f0, $f13
+; MIPS64-NEXT:    c.un.s $f13, $f13
+; MIPS64-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ole.s $f12, $f13
+; MIPS64-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS64-NEXT:    mfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.s $f1, $f0
+; MIPS64-NEXT:    movz.s $f1, $f12, $1
+; MIPS64-NEXT:    mtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.s $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: maximum_float:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.s $f0, $f14
+; MIPS32R2-NEXT:    c.un.s $f14, $f14
+; MIPS32R2-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ole.s $f12, $f14
+; MIPS32R2-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    mfc1 $1, $f12
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.s $f1, $f0
+; MIPS32R2-NEXT:    movz.s $f1, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f2
+; MIPS32R2-NEXT:    c.eq.s $f0, $f2
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32-LABEL: maximum_float:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.s $f0, $f14
+; MIPS32-NEXT:    c.un.s $f14, $f14
+; MIPS32-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ole.s $f12, $f14
+; MIPS32-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32-NEXT:    mfc1 $1, $f12
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.s $f1, $f0
+; MIPS32-NEXT:    movz.s $f1, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f2
+; MIPS32-NEXT:    c.eq.s $f0, $f2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.s $f0, $f1, $fcc0
+; MIPS32R5-LABEL: maximum_float:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.s $f0, $f14
+; MIPS32R5-NEXT:    c.un.s $f12, $f12
+; MIPS32R5-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.s $f14, $f14
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.ule.s $f12, $f0
+; MIPS32R5-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    mfc1 $1, $f12
+; MIPS32R5-NEXT:    mov.s $f1, $f0
+; MIPS32R5-NEXT:    movz.s $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.s $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.s $f0, $f1, $fcc0
+  %z = call float @llvm.maximum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximum_float_nsz(float %x, float %y) {
+; MIPS32R6-LABEL: maximum_float_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    sel.s $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.s $f1, $f0, $f0
+; MIPS32R6-NEXT:    sel.s $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: maximum_float_nsz:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.s $f0, $f13
+; MIPS64R2-NEXT:    c.un.s $f13, $f13
+; MIPS64R2-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ole.s $f12, $f13
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movf.s $f0, $f12, $fcc0
+;
+; MIPS64-LABEL: maximum_float_nsz:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.s $f0, $f13
+; MIPS64-NEXT:    c.un.s $f13, $f13
+; MIPS64-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ole.s $f12, $f13
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movf.s $f0, $f12, $fcc0
+;
+; MIPS32R2-LABEL: maximum_float_nsz:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.s $f0, $f14
+; MIPS32R2-NEXT:    c.un.s $f14, $f14
+; MIPS32R2-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ole.s $f12, $f14
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movf.s $f0, $f12, $fcc0
+;
+; MIPS32-LABEL: maximum_float_nsz:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.s $f0, $f14
+; MIPS32-NEXT:    c.un.s $f14, $f14
+; MIPS32-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ole.s $f12, $f14
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32R5-LABEL: maximum_float_nsz:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.s $f0, $f14
+; MIPS32R5-NEXT:    c.un.s $f12, $f12
+; MIPS32R5-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.s $f14, $f14
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.ule.s $f12, $f0
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movf.s $f0, $f12, $fcc0
+  %z = call nsz float @llvm.maximum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximum_float_nnan(float %x, float %y) {
+; MIPS32R6-LABEL: maximum_float_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f12, $f14
+;
+; MIPS64R2-LABEL: maximum_float_nnan:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.s $f0, $f13
+; MIPS64R2-NEXT:    c.ole.s $f12, $f13
+; MIPS64R2-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    mfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.s $f1, $f0
+; MIPS64R2-NEXT:    movz.s $f1, $f12, $1
+; MIPS64R2-NEXT:    mtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.s $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: maximum_float_nnan:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.s $f0, $f13
+; MIPS64-NEXT:    c.ole.s $f12, $f13
+; MIPS64-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS64-NEXT:    mfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.s $f1, $f0
+; MIPS64-NEXT:    movz.s $f1, $f12, $1
+; MIPS64-NEXT:    mtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.s $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: maximum_float_nnan:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.s $f0, $f14
+; MIPS32R2-NEXT:    c.ole.s $f12, $f14
+; MIPS32R2-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    mfc1 $1, $f12
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.s $f1, $f0
+; MIPS32R2-NEXT:    movz.s $f1, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f2
+; MIPS32R2-NEXT:    c.eq.s $f0, $f2
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32-LABEL: maximum_float_nnan:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.s $f0, $f14
+; MIPS32-NEXT:    c.ole.s $f12, $f14
+; MIPS32-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32-NEXT:    mfc1 $1, $f12
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.s $f1, $f0
+; MIPS32-NEXT:    movz.s $f1, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f2
+; MIPS32-NEXT:    c.eq.s $f0, $f2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.s $f0, $f1, $fcc0
+; MIPS32R5-LABEL: maximum_float_nnan:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.s $f0, $f14
+; MIPS32R5-NEXT:    c.ule.s $f12, $f14
+; MIPS32R5-NEXT:    movf.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    mfc1 $1, $f12
+; MIPS32R5-NEXT:    mov.s $f1, $f0
+; MIPS32R5-NEXT:    movz.s $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.s $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.s $f0, $f1, $fcc0
+  %z = call nnan float @llvm.maximum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximum_double(double %x, double %y) {
+; MIPS32R6-LABEL: maximum_double:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    mfc1 $1, $f0
+; MIPS32R6-NEXT:    mtc1 $1, $f0
+; MIPS32R6-NEXT:    sel.d $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.d $f1, $f0, $f0
+; MIPS32R6-NEXT:    mfc1 $1, $f1
+; MIPS32R6-NEXT:    mtc1 $1, $f1
+; MIPS32R6-NEXT:    sel.d $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: maximum_double:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.d $f0, $f13
+; MIPS64R2-NEXT:    c.un.d $f13, $f13
+; MIPS64R2-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ole.d $f12, $f13
+; MIPS64R2-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    dmfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.d $f1, $f0
+; MIPS64R2-NEXT:    movz.d $f1, $f12, $1
+; MIPS64R2-NEXT:    dmtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.d $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: maximum_double:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.d $f0, $f13
+; MIPS64-NEXT:    c.un.d $f13, $f13
+; MIPS64-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ole.d $f12, $f13
+; MIPS64-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS64-NEXT:    dmfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.d $f1, $f0
+; MIPS64-NEXT:    movz.d $f1, $f12, $1
+; MIPS64-NEXT:    dmtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.d $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: maximum_double:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.d $f0, $f14
+; MIPS32R2-NEXT:    c.un.d $f14, $f14
+; MIPS32R2-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ole.d $f12, $f14
+; MIPS32R2-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    cvt.s.d $f2, $f12
+; MIPS32R2-NEXT:    mfc1 $1, $f2
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.d $f2, $f0
+; MIPS32R2-NEXT:    movz.d $f2, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f4
+; MIPS32R2-NEXT:    mthc1 $zero, $f4
+; MIPS32R2-NEXT:    c.eq.d $f0, $f4
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.d $f0, $f2, $fcc0
+;
+; MIPS32-LABEL: maximum_double:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.d $f0, $f14
+; MIPS32-NEXT:    c.un.d $f14, $f14
+; MIPS32-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ole.d $f12, $f14
+; MIPS32-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32-NEXT:    cvt.s.d $f2, $f12
+; MIPS32-NEXT:    mfc1 $1, $f2
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.d $f2, $f0
+; MIPS32-NEXT:    movz.d $f2, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f4
+; MIPS32-NEXT:    mtc1 $zero, $f5
+; MIPS32-NEXT:    c.eq.d $f0, $f4
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.d $f0, $f2, $fcc0
+; MIPS32R5-LABEL: maximum_double:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.d $f0, $f14
+; MIPS32R5-NEXT:    c.un.d $f12, $f12
+; MIPS32R5-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.d $f14, $f14
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.ule.d $f12, $f0
+; MIPS32R5-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    cvt.s.d $f1, $f12
+; MIPS32R5-NEXT:    mfc1 $1, $f1
+; MIPS32R5-NEXT:    mov.d $f1, $f0
+; MIPS32R5-NEXT:    movz.d $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    mthc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.d $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.d $f0, $f1, $fcc0
+  %z = call double @llvm.maximum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximum_double_nsz(double %x, double %y) {
+; MIPS32R6-LABEL: maximum_double_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    mfc1 $1, $f0
+; MIPS32R6-NEXT:    mtc1 $1, $f0
+; MIPS32R6-NEXT:    sel.d $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.d $f1, $f0, $f0
+; MIPS32R6-NEXT:    mfc1 $1, $f1
+; MIPS32R6-NEXT:    mtc1 $1, $f1
+; MIPS32R6-NEXT:    sel.d $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: maximum_double_nsz:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.d $f0, $f13
+; MIPS64R2-NEXT:    c.un.d $f13, $f13
+; MIPS64R2-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ole.d $f12, $f13
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movf.d $f0, $f12, $fcc0
+;
+; MIPS64-LABEL: maximum_double_nsz:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.d $f0, $f13
+; MIPS64-NEXT:    c.un.d $f13, $f13
+; MIPS64-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ole.d $f12, $f13
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movf.d $f0, $f12, $fcc0
+;
+; MIPS32R2-LABEL: maximum_double_nsz:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.d $f0, $f14
+; MIPS32R2-NEXT:    c.un.d $f14, $f14
+; MIPS32R2-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ole.d $f12, $f14
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movf.d $f0, $f12, $fcc0
+;
+; MIPS32-LABEL: maximum_double_nsz:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.d $f0, $f14
+; MIPS32-NEXT:    c.un.d $f14, $f14
+; MIPS32-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ole.d $f12, $f14
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32R5-LABEL: maximum_double_nsz:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.d $f0, $f14
+; MIPS32R5-NEXT:    c.un.d $f12, $f12
+; MIPS32R5-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.d $f14, $f14
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.ule.d $f12, $f0
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movf.d $f0, $f12, $fcc0
+  %z = call nsz double @llvm.maximum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximum_double_nnan(double %x, double %y) {
+; MIPS32R6-LABEL: maximum_double_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f12, $f14
+;
+; MIPS64R2-LABEL: maximum_double_nnan:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.d $f0, $f13
+; MIPS64R2-NEXT:    c.ole.d $f12, $f13
+; MIPS64R2-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    dmfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.d $f1, $f0
+; MIPS64R2-NEXT:    movz.d $f1, $f12, $1
+; MIPS64R2-NEXT:    dmtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.d $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: maximum_double_nnan:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.d $f0, $f13
+; MIPS64-NEXT:    c.ole.d $f12, $f13
+; MIPS64-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS64-NEXT:    dmfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.d $f1, $f0
+; MIPS64-NEXT:    movz.d $f1, $f12, $1
+; MIPS64-NEXT:    dmtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.d $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: maximum_double_nnan:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.d $f0, $f14
+; MIPS32R2-NEXT:    c.ole.d $f12, $f14
+; MIPS32R2-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    cvt.s.d $f2, $f12
+; MIPS32R2-NEXT:    mfc1 $1, $f2
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.d $f2, $f0
+; MIPS32R2-NEXT:    movz.d $f2, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f4
+; MIPS32R2-NEXT:    mthc1 $zero, $f4
+; MIPS32R2-NEXT:    c.eq.d $f0, $f4
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.d $f0, $f2, $fcc0
+;
+; MIPS32-LABEL: maximum_double_nnan:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.d $f0, $f14
+; MIPS32-NEXT:    c.ole.d $f12, $f14
+; MIPS32-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32-NEXT:    cvt.s.d $f2, $f12
+; MIPS32-NEXT:    mfc1 $1, $f2
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.d $f2, $f0
+; MIPS32-NEXT:    movz.d $f2, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f4
+; MIPS32-NEXT:    mtc1 $zero, $f5
+; MIPS32-NEXT:    c.eq.d $f0, $f4
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.d $f0, $f2, $fcc0
+; MIPS32R5-LABEL: maximum_double_nnan:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.d $f0, $f14
+; MIPS32R5-NEXT:    c.ule.d $f12, $f14
+; MIPS32R5-NEXT:    movf.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    cvt.s.d $f1, $f12
+; MIPS32R5-NEXT:    mfc1 $1, $f1
+; MIPS32R5-NEXT:    mov.d $f1, $f0
+; MIPS32R5-NEXT:    movz.d $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    mthc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.d $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.d $f0, $f1, $fcc0
+  %z = call nnan double @llvm.maximum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimum_float(float %x, float %y) {
+; MIPS32R6-LABEL: minimum_float:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    sel.s $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.s $f1, $f0, $f0
+; MIPS32R6-NEXT:    sel.s $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: minimum_float:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.s $f0, $f13
+; MIPS64R2-NEXT:    c.un.s $f13, $f13
+; MIPS64R2-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ult.s $f12, $f13
+; MIPS64R2-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    mfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.s $f1, $f0
+; MIPS64R2-NEXT:    movn.s $f1, $f12, $1
+; MIPS64R2-NEXT:    mtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.s $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: minimum_float:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.s $f0, $f13
+; MIPS64-NEXT:    c.un.s $f13, $f13
+; MIPS64-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ult.s $f12, $f13
+; MIPS64-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS64-NEXT:    mfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.s $f1, $f0
+; MIPS64-NEXT:    movn.s $f1, $f12, $1
+; MIPS64-NEXT:    mtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.s $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: minimum_float:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.s $f0, $f14
+; MIPS32R2-NEXT:    c.un.s $f14, $f14
+; MIPS32R2-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ult.s $f12, $f14
+; MIPS32R2-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    mfc1 $1, $f12
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.s $f1, $f0
+; MIPS32R2-NEXT:    movn.s $f1, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f2
+; MIPS32R2-NEXT:    c.eq.s $f0, $f2
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32-LABEL: minimum_float:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.s $f0, $f14
+; MIPS32-NEXT:    c.un.s $f14, $f14
+; MIPS32-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ult.s $f12, $f14
+; MIPS32-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32-NEXT:    mfc1 $1, $f12
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.s $f1, $f0
+; MIPS32-NEXT:    movn.s $f1, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f2
+; MIPS32-NEXT:    c.eq.s $f0, $f2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.s $f0, $f1, $fcc0
+; MIPS32R5-LABEL: minimum_float:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.s $f0, $f14
+; MIPS32R5-NEXT:    c.un.s $f12, $f12
+; MIPS32R5-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.s $f14, $f14
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.olt.s $f12, $f0
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    mfc1 $1, $f12
+; MIPS32R5-NEXT:    lui $2, 32768
+; MIPS32R5-NEXT:    xor $1, $1, $2
+; MIPS32R5-NEXT:    mov.s $f1, $f0
+; MIPS32R5-NEXT:    movz.s $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.s $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.s $f0, $f1, $fcc0
+  %z = call float @llvm.minimum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimum_float_nsz(float %x, float %y) {
+; MIPS32R6-LABEL: minimum_float_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    sel.s $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.s $f1, $f0, $f0
+; MIPS32R6-NEXT:    sel.s $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: minimum_float_nsz:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.s $f0, $f13
+; MIPS64R2-NEXT:    c.un.s $f13, $f13
+; MIPS64R2-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ult.s $f12, $f13
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.s $f0, $f12, $fcc0
+;
+; MIPS64-LABEL: minimum_float_nsz:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.s $f0, $f13
+; MIPS64-NEXT:    c.un.s $f13, $f13
+; MIPS64-NEXT:    movt.s $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ult.s $f12, $f13
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.s $f0, $f12, $fcc0
+;
+; MIPS32R2-LABEL: minimum_float_nsz:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.s $f0, $f14
+; MIPS32R2-NEXT:    c.un.s $f14, $f14
+; MIPS32R2-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ult.s $f12, $f14
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.s $f0, $f12, $fcc0
+;
+; MIPS32-LABEL: minimum_float_nsz:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.s $f0, $f14
+; MIPS32-NEXT:    c.un.s $f14, $f14
+; MIPS32-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ult.s $f12, $f14
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-LABEL: minimum_float_nsz:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.s $f0, $f14
+; MIPS32R5-NEXT:    c.un.s $f12, $f12
+; MIPS32R5-NEXT:    movt.s $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.s $f14, $f14
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.olt.s $f12, $f0
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+  %z = call nsz float @llvm.minimum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimum_float_nnan(float %x, float %y) {
+; MIPS32R6-LABEL: minimum_float_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f12, $f14
+;
+; MIPS64R2-LABEL: minimum_float_nnan:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.s $f0, $f13
+; MIPS64R2-NEXT:    c.ult.s $f12, $f13
+; MIPS64R2-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    mfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.s $f1, $f0
+; MIPS64R2-NEXT:    movn.s $f1, $f12, $1
+; MIPS64R2-NEXT:    mtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.s $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: minimum_float_nnan:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.s $f0, $f13
+; MIPS64-NEXT:    c.ult.s $f12, $f13
+; MIPS64-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS64-NEXT:    mfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.s $f1, $f0
+; MIPS64-NEXT:    movn.s $f1, $f12, $1
+; MIPS64-NEXT:    mtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.s $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: minimum_float_nnan:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.s $f0, $f14
+; MIPS32R2-NEXT:    c.ult.s $f12, $f14
+; MIPS32R2-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    mfc1 $1, $f12
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.s $f1, $f0
+; MIPS32R2-NEXT:    movn.s $f1, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f2
+; MIPS32R2-NEXT:    c.eq.s $f0, $f2
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.s $f0, $f1, $fcc0
+;
+; MIPS32-LABEL: minimum_float_nnan:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.s $f0, $f14
+; MIPS32-NEXT:    c.ult.s $f12, $f14
+; MIPS32-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32-NEXT:    mfc1 $1, $f12
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.s $f1, $f0
+; MIPS32-NEXT:    movn.s $f1, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f2
+; MIPS32-NEXT:    c.eq.s $f0, $f2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.s $f0, $f1, $fcc0
+; MIPS32R5-LABEL: minimum_float_nnan:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.s $f0, $f14
+; MIPS32R5-NEXT:    c.olt.s $f12, $f14
+; MIPS32R5-NEXT:    movt.s $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    mfc1 $1, $f12
+; MIPS32R5-NEXT:    lui $2, 32768
+; MIPS32R5-NEXT:    xor $1, $1, $2
+; MIPS32R5-NEXT:    mov.s $f1, $f0
+; MIPS32R5-NEXT:    movz.s $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.s $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.s $f0, $f1, $fcc0
+  %z = call nnan float @llvm.minimum.f32(float %x, float %y)
+  ret float %z
+}
+
+define double @minimum_double(double %x, double %y) {
+; MIPS32R6-LABEL: minimum_double:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    mfc1 $1, $f0
+; MIPS32R6-NEXT:    mtc1 $1, $f0
+; MIPS32R6-NEXT:    sel.d $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.d $f1, $f0, $f0
+; MIPS32R6-NEXT:    mfc1 $1, $f1
+; MIPS32R6-NEXT:    mtc1 $1, $f1
+; MIPS32R6-NEXT:    sel.d $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: minimum_double:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.d $f0, $f13
+; MIPS64R2-NEXT:    c.un.d $f13, $f13
+; MIPS64R2-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ult.d $f12, $f13
+; MIPS64R2-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    dmfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.d $f1, $f0
+; MIPS64R2-NEXT:    movn.d $f1, $f12, $1
+; MIPS64R2-NEXT:    dmtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.d $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: minimum_double:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.d $f0, $f13
+; MIPS64-NEXT:    c.un.d $f13, $f13
+; MIPS64-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ult.d $f12, $f13
+; MIPS64-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS64-NEXT:    dmfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.d $f1, $f0
+; MIPS64-NEXT:    movn.d $f1, $f12, $1
+; MIPS64-NEXT:    dmtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.d $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: minimum_double:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.d $f0, $f14
+; MIPS32R2-NEXT:    c.un.d $f14, $f14
+; MIPS32R2-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ult.d $f12, $f14
+; MIPS32R2-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    cvt.s.d $f2, $f12
+; MIPS32R2-NEXT:    mfc1 $1, $f2
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.d $f2, $f0
+; MIPS32R2-NEXT:    movn.d $f2, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f4
+; MIPS32R2-NEXT:    mthc1 $zero, $f4
+; MIPS32R2-NEXT:    c.eq.d $f0, $f4
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.d $f0, $f2, $fcc0
+;
+; MIPS32-LABEL: minimum_double:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.d $f0, $f14
+; MIPS32-NEXT:    c.un.d $f14, $f14
+; MIPS32-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ult.d $f12, $f14
+; MIPS32-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32-NEXT:    cvt.s.d $f2, $f12
+; MIPS32-NEXT:    mfc1 $1, $f2
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.d $f2, $f0
+; MIPS32-NEXT:    movn.d $f2, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f4
+; MIPS32-NEXT:    mtc1 $zero, $f5
+; MIPS32-NEXT:    c.eq.d $f0, $f4
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.d $f0, $f2, $fcc0
+; MIPS32R5-LABEL: minimum_double:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.d $f0, $f14
+; MIPS32R5-NEXT:    c.un.d $f12, $f12
+; MIPS32R5-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.d $f14, $f14
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.olt.d $f12, $f0
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    cvt.s.d $f1, $f12
+; MIPS32R5-NEXT:    mfc1 $1, $f1
+; MIPS32R5-NEXT:    lui $2, 32768
+; MIPS32R5-NEXT:    xor $1, $1, $2
+; MIPS32R5-NEXT:    mov.d $f1, $f0
+; MIPS32R5-NEXT:    movz.d $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    mthc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.d $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.d $f0, $f1, $fcc0
+  %z = call double @llvm.minimum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimum_double_nsz(double %x, double %y) {
+; MIPS32R6-LABEL: minimum_double_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    cmp.un.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    mfc1 $1, $f0
+; MIPS32R6-NEXT:    mtc1 $1, $f0
+; MIPS32R6-NEXT:    sel.d $f0, $f12, $f14
+; MIPS32R6-NEXT:    cmp.un.d $f1, $f0, $f0
+; MIPS32R6-NEXT:    mfc1 $1, $f1
+; MIPS32R6-NEXT:    mtc1 $1, $f1
+; MIPS32R6-NEXT:    sel.d $f1, $f14, $f0
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f0, $f1
+;
+; MIPS64R2-LABEL: minimum_double_nsz:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.d $f0, $f13
+; MIPS64R2-NEXT:    c.un.d $f13, $f13
+; MIPS64R2-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64R2-NEXT:    c.ult.d $f12, $f13
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.d $f0, $f12, $fcc0
+;
+; MIPS64-LABEL: minimum_double_nsz:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.d $f0, $f13
+; MIPS64-NEXT:    c.un.d $f13, $f13
+; MIPS64-NEXT:    movt.d $f12, $f13, $fcc0
+; MIPS64-NEXT:    c.ult.d $f12, $f13
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.d $f0, $f12, $fcc0
+;
+; MIPS32R2-LABEL: minimum_double_nsz:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.d $f0, $f14
+; MIPS32R2-NEXT:    c.un.d $f14, $f14
+; MIPS32R2-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R2-NEXT:    c.ult.d $f12, $f14
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.d $f0, $f12, $fcc0
+;
+; MIPS32-LABEL: minimum_double_nsz:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.d $f0, $f14
+; MIPS32-NEXT:    c.un.d $f14, $f14
+; MIPS32-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32-NEXT:    c.ult.d $f12, $f14
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-LABEL: minimum_double_nsz:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.d $f0, $f14
+; MIPS32R5-NEXT:    c.un.d $f12, $f12
+; MIPS32R5-NEXT:    movt.d $f12, $f14, $fcc0
+; MIPS32R5-NEXT:    c.un.d $f14, $f14
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    c.olt.d $f12, $f0
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+  %z = call nsz double @llvm.minimum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimum_double_nnan(double %x, double %y) {
+; MIPS32R6-LABEL: minimum_double_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f12, $f14
+;
+; MIPS64R2-LABEL: minimum_double_nnan:
+; MIPS64R2:       # %bb.0:
+; MIPS64R2-NEXT:    mov.d $f0, $f13
+; MIPS64R2-NEXT:    c.ult.d $f12, $f13
+; MIPS64R2-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS64R2-NEXT:    dmfc1 $1, $f12
+; MIPS64R2-NEXT:    slti $1, $1, 0
+; MIPS64R2-NEXT:    mov.d $f1, $f0
+; MIPS64R2-NEXT:    movn.d $f1, $f12, $1
+; MIPS64R2-NEXT:    dmtc1 $zero, $f2
+; MIPS64R2-NEXT:    c.eq.d $f0, $f2
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS64-LABEL: minimum_double_nnan:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    mov.d $f0, $f13
+; MIPS64-NEXT:    c.ult.d $f12, $f13
+; MIPS64-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS64-NEXT:    dmfc1 $1, $f12
+; MIPS64-NEXT:    slti $1, $1, 0
+; MIPS64-NEXT:    mov.d $f1, $f0
+; MIPS64-NEXT:    movn.d $f1, $f12, $1
+; MIPS64-NEXT:    dmtc1 $zero, $f2
+; MIPS64-NEXT:    c.eq.d $f0, $f2
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    movt.d $f0, $f1, $fcc0
+;
+; MIPS32R2-LABEL: minimum_double_nnan:
+; MIPS32R2:       # %bb.0:
+; MIPS32R2-NEXT:    mov.d $f0, $f14
+; MIPS32R2-NEXT:    c.ult.d $f12, $f14
+; MIPS32R2-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R2-NEXT:    cvt.s.d $f2, $f12
+; MIPS32R2-NEXT:    mfc1 $1, $f2
+; MIPS32R2-NEXT:    slti $1, $1, 0
+; MIPS32R2-NEXT:    mov.d $f2, $f0
+; MIPS32R2-NEXT:    movn.d $f2, $f12, $1
+; MIPS32R2-NEXT:    mtc1 $zero, $f4
+; MIPS32R2-NEXT:    mthc1 $zero, $f4
+; MIPS32R2-NEXT:    c.eq.d $f0, $f4
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    movt.d $f0, $f2, $fcc0
+;
+; MIPS32-LABEL: minimum_double_nnan:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    mov.d $f0, $f14
+; MIPS32-NEXT:    c.ult.d $f12, $f14
+; MIPS32-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32-NEXT:    cvt.s.d $f2, $f12
+; MIPS32-NEXT:    mfc1 $1, $f2
+; MIPS32-NEXT:    slti $1, $1, 0
+; MIPS32-NEXT:    mov.d $f2, $f0
+; MIPS32-NEXT:    movn.d $f2, $f12, $1
+; MIPS32-NEXT:    mtc1 $zero, $f4
+; MIPS32-NEXT:    mtc1 $zero, $f5
+; MIPS32-NEXT:    c.eq.d $f0, $f4
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    movt.d $f0, $f2, $fcc0
+; MIPS32R5-LABEL: minimum_double_nnan:
+; MIPS32R5:       # %bb.0:
+; MIPS32R5-NEXT:    mov.d $f0, $f14
+; MIPS32R5-NEXT:    c.olt.d $f12, $f14
+; MIPS32R5-NEXT:    movt.d $f0, $f12, $fcc0
+; MIPS32R5-NEXT:    cvt.s.d $f1, $f12
+; MIPS32R5-NEXT:    mfc1 $1, $f1
+; MIPS32R5-NEXT:    lui $2, 32768
+; MIPS32R5-NEXT:    xor $1, $1, $2
+; MIPS32R5-NEXT:    mov.d $f1, $f0
+; MIPS32R5-NEXT:    movz.d $f1, $f12, $1
+; MIPS32R5-NEXT:    mtc1 $zero, $f2
+; MIPS32R5-NEXT:    mthc1 $zero, $f2
+; MIPS32R5-NEXT:    c.eq.d $f0, $f2
+; MIPS32R5-NEXT:    jr $ra
+; MIPS32R5-NEXT:    movt.d $f0, $f1, $fcc0
+  %z = call nnan double @llvm.minimum.f64(double %x, double %y)
+  ret double %z
+}
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index 41f77b5337e6d..979e47cd84f76 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -1558,8 +1558,8 @@ define bfloat @test_roundeven(bfloat %a) {
 define bfloat @test_maximum(bfloat %a, bfloat %b) {
 ; SM70-LABEL: test_maximum(
 ; SM70:       {
-; SM70-NEXT:    .reg .pred %p<6>;
-; SM70-NEXT:    .reg .b16 %rs<8>;
+; SM70-NEXT:    .reg .pred %p<5>;
+; SM70-NEXT:    .reg .b16 %rs<7>;
 ; SM70-NEXT:    .reg .b32 %r<7>;
 ; SM70-EMPTY:
 ; SM70-NEXT:  // %bb.0:
@@ -1567,21 +1567,19 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) {
 ; SM70-NEXT:    ld.param.b16 %rs2, [test_maximum_param_1];
 ; SM70-NEXT:    cvt.u32.u16 %r1, %rs2;
 ; SM70-NEXT:    shl.b32 %r2, %r1, 16;
-; SM70-NEXT:    cvt.u32.u16 %r3, %rs1;
+; SM70-NEXT:    setp.nan.f32 %p1, %r2, %r2;
+; SM70-NEXT:    selp.b16 %rs3, %rs2, %rs1, %p1;
+; SM70-NEXT:    cvt.u32.u16 %r3, %rs3;
 ; SM70-NEXT:    shl.b32 %r4, %r3, 16;
-; SM70-NEXT:    setp.gt.f32 %p1, %r4, %r2;
-; SM70-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; SM70-NEXT:    setp.nan.f32 %p2, %r4, %r2;
-; SM70-NEXT:    selp.b16 %rs4, 0x7FC0, %rs3, %p2;
-; SM70-NEXT:    setp.eq.b16 %p3, %rs1, 0;
-; SM70-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; SM70-NEXT:    setp.eq.b16 %p4, %rs2, 0;
-; SM70-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
+; SM70-NEXT:    setp.gtu.f32 %p2, %r4, %r2;
+; SM70-NEXT:    selp.b16 %rs4, %rs3, %rs2, %p2;
+; SM70-NEXT:    setp.gt.s16 %p3, %rs3, -1;
+; SM70-NEXT:    selp.b16 %rs5, %rs3, %rs4, %p3;
 ; SM70-NEXT:    cvt.u32.u16 %r5, %rs4;
 ; SM70-NEXT:    shl.b32 %r6, %r5, 16;
-; SM70-NEXT:    setp.eq.f32 %p5, %r6, 0f00000000;
-; SM70-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; SM70-NEXT:    st.param.b16 [func_retval0], %rs7;
+; SM70-NEXT:    setp.eq.f32 %p4, %r6, 0f00000000;
+; SM70-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p4;
+; SM70-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; SM70-NEXT:    ret;
 ;
 ; SM80-LABEL: test_maximum(
@@ -1703,46 +1701,44 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
 define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; SM70-LABEL: test_maximum_v2(
 ; SM70:       {
-; SM70-NEXT:    .reg .pred %p<11>;
+; SM70-NEXT:    .reg .pred %p<9>;
 ; SM70-NEXT:    .reg .b16 %rs<15>;
-; SM70-NEXT:    .reg .b32 %r<13>;
+; SM70-NEXT:    .reg .b32 %r<14>;
 ; SM70-EMPTY:
 ; SM70-NEXT:  // %bb.0:
 ; SM70-NEXT:    ld.param.v2.b16 {%rs1, %rs2}, [test_maximum_v2_param_0];
 ; SM70-NEXT:    ld.param.v2.b16 {%rs3, %rs4}, [test_maximum_v2_param_1];
 ; SM70-NEXT:    cvt.u32.u16 %r1, %rs4;
 ; SM70-NEXT:    shl.b32 %r2, %r1, 16;
-; SM70-NEXT:    cvt.u32.u16 %r3, %rs2;
+; SM70-NEXT:    setp.nan.f32 %p1, %r2, %r2;
+; SM70-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
+; SM70-NEXT:    cvt.u32.u16 %r3, %rs5;
 ; SM70-NEXT:    shl.b32 %r4, %r3, 16;
-; SM70-NEXT:    setp.gt.f32 %p1, %r4, %r2;
-; SM70-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
-; SM70-NEXT:    setp.nan.f32 %p2, %r4, %r2;
-; SM70-NEXT:    selp.b16 %rs6, 0x7FC0, %rs5, %p2;
-; SM70-NEXT:    setp.eq.b16 %p3, %rs2, 0;
-; SM70-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; SM70-NEXT:    setp.eq.b16 %p4, %rs4, 0;
-; SM70-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
-; SM70-NEXT:    cvt.u32.u16 %r5, %rs6;
+; SM70-NEXT:    setp.gtu.f32 %p2, %r4, %r2;
+; SM70-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p2;
+; SM70-NEXT:    cvt.u32.u16 %r5, %rs3;
 ; SM70-NEXT:    shl.b32 %r6, %r5, 16;
-; SM70-NEXT:    setp.eq.f32 %p5, %r6, 0f00000000;
-; SM70-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
-; SM70-NEXT:    cvt.u32.u16 %r7, %rs3;
-; SM70-NEXT:    shl.b32 %r8, %r7, 16;
-; SM70-NEXT:    cvt.u32.u16 %r9, %rs1;
-; SM70-NEXT:    shl.b32 %r10, %r9, 16;
-; SM70-NEXT:    setp.gt.f32 %p6, %r10, %r8;
-; SM70-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
-; SM70-NEXT:    setp.nan.f32 %p7, %r10, %r8;
-; SM70-NEXT:    selp.b16 %rs11, 0x7FC0, %rs10, %p7;
-; SM70-NEXT:    setp.eq.b16 %p8, %rs1, 0;
-; SM70-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; SM70-NEXT:    setp.eq.b16 %p9, %rs3, 0;
-; SM70-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
-; SM70-NEXT:    cvt.u32.u16 %r11, %rs11;
-; SM70-NEXT:    shl.b32 %r12, %r11, 16;
-; SM70-NEXT:    setp.eq.f32 %p10, %r12, 0f00000000;
-; SM70-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
-; SM70-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs9};
+; SM70-NEXT:    setp.nan.f32 %p3, %r6, %r6;
+; SM70-NEXT:    selp.b16 %rs7, %rs3, %rs1, %p3;
+; SM70-NEXT:    mov.b32 %r7, {%rs7, %rs5};
+; SM70-NEXT:    mov.b32 {%rs8, %rs9}, %r7;
+; SM70-NEXT:    setp.gt.s16 %p4, %rs9, -1;
+; SM70-NEXT:    selp.b16 %rs10, %rs5, %rs6, %p4;
+; SM70-NEXT:    cvt.u32.u16 %r8, %rs6;
+; SM70-NEXT:    shl.b32 %r9, %r8, 16;
+; SM70-NEXT:    setp.eq.f32 %p5, %r9, 0f00000000;
+; SM70-NEXT:    selp.b16 %rs11, %rs10, %rs6, %p5;
+; SM70-NEXT:    cvt.u32.u16 %r10, %rs7;
+; SM70-NEXT:    shl.b32 %r11, %r10, 16;
+; SM70-NEXT:    setp.gtu.f32 %p6, %r11, %r6;
+; SM70-NEXT:    selp.b16 %rs12, %rs7, %rs3, %p6;
+; SM70-NEXT:    setp.gt.s16 %p7, %rs8, -1;
+; SM70-NEXT:    selp.b16 %rs13, %rs7, %rs12, %p7;
+; SM70-NEXT:    cvt.u32.u16 %r12, %rs12;
+; SM70-NEXT:    shl.b32 %r13, %r12, 16;
+; SM70-NEXT:    setp.eq.f32 %p8, %r13, 0f00000000;
+; SM70-NEXT:    selp.b16 %rs14, %rs13, %rs12, %p8;
+; SM70-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs11};
 ; SM70-NEXT:    ret;
 ;
 ; SM80-LABEL: test_maximum_v2(
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index 6fb0112631af7..fb7e99f8cb01a 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -616,27 +616,25 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) {
 define half @minimum_half(half %a, half %b) {
 ; CHECK-NOF16-LABEL: minimum_half(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<6>;
-; CHECK-NOF16-NEXT:    .reg .b16 %rs<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
+; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
 ; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r1, %rs2;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs1;
-; CHECK-NOF16-NEXT:    setp.lt.f32 %p1, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, -32768;
-; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, -32768;
-; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs2, %rs1, %p1;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs3;
+; CHECK-NOF16-NEXT:    setp.ltu.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs4, %rs3, %rs2, %p2;
+; CHECK-NOF16-NEXT:    setp.lt.s16 %p3, %rs3, 0;
+; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs3, %rs4, %p3;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r3, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p4;
+; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_half(
@@ -652,27 +650,25 @@ define half @minimum_half(half %a, half %b) {
 ;
 ; CHECK-SM80-NOF16-LABEL: minimum_half(
 ; CHECK-SM80-NOF16:       {
-; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<6>;
-; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<8>;
+; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<5>;
+; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<7>;
 ; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-SM80-NOF16-EMPTY:
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r1, %rs2;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs1;
-; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p1, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, -32768;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, -32768;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs2, %rs1, %p1;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs3;
+; CHECK-SM80-NOF16-NEXT:    setp.ltu.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, %rs3, %rs2, %p2;
+; CHECK-SM80-NOF16-NEXT:    setp.lt.s16 %p3, %rs3, 0;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs3, %rs4, %p3;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p4, %r3, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p4;
+; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call half @llvm.minimum.f16(half %a, half %b)
   ret half %x
@@ -681,16 +677,18 @@ define half @minimum_half(half %a, half %b) {
 define float @minimum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: minimum_float(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r2, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, %r2, %r1, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r3, %r3;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, %r3, %r2, %p2;
+; CHECK-NOF16-NEXT:    min.f32 %r5, %r3, %r4;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float(
@@ -727,8 +725,8 @@ define float @minimum_imm1(float %a) {
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm1_param_0];
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    selp.f32 %r2, %r1, 0f00000000, %p1;
+; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
@@ -764,8 +762,8 @@ define float @minimum_imm2(float %a) {
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm2_param_0];
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    selp.f32 %r2, %r1, 0f00000000, %p1;
+; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
@@ -795,16 +793,18 @@ define float @minimum_imm2(float %a) {
 define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: minimum_float_ftz(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_ftz_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    min.ftz.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r2, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, %r2, %r1, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p2, %r3, %r3;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, %r3, %r2, %p2;
+; CHECK-NOF16-NEXT:    min.ftz.f32 %r5, %r3, %r4;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float_ftz(
@@ -835,16 +835,18 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
 define double @minimum_double(double %a, double %b) {
 ; CHECK-LABEL: minimum_double(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-NEXT:    .reg .pred %p<3>;
+; CHECK-NEXT:    .reg .b64 %rd<6>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [minimum_double_param_0];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [minimum_double_param_1];
-; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
-; CHECK-NEXT:    min.f64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    setp.nan.f64 %p1, %rd2, %rd2;
+; CHECK-NEXT:    selp.f64 %rd3, %rd2, %rd1, %p1;
+; CHECK-NEXT:    setp.nan.f64 %p2, %rd3, %rd3;
+; CHECK-NEXT:    selp.f64 %rd4, %rd3, %rd2, %p2;
+; CHECK-NEXT:    min.f64 %rd5, %rd3, %rd4;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd5;
 ; CHECK-NEXT:    ret;
   %x = call double @llvm.minimum.f64(double %a, double %b)
   ret double %x
@@ -853,40 +855,38 @@ define double @minimum_double(double %a, double %b) {
 define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-NOF16-LABEL: minimum_v2half(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<11>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<9>;
 ; CHECK-NOF16-NEXT:    .reg .b16 %rs<15>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<7>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.v2.b16 {%rs1, %rs2}, [minimum_v2half_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.v2.b16 {%rs3, %rs4}, [minimum_v2half_param_1];
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r1, %rs4;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs2;
-; CHECK-NOF16-NEXT:    setp.lt.f32 %p1, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, -32768;
-; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, -32768;
-; CHECK-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r4, %rs3;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r5, %rs1;
-; CHECK-NOF16-NEXT:    setp.lt.f32 %p6, %r5, %r4;
-; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
-; CHECK-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, -32768;
-; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, -32768;
-; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
-; CHECK-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs9};
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs5;
+; CHECK-NOF16-NEXT:    setp.ltu.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p2;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs3;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p3, %r3, %r3;
+; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs3, %rs1, %p3;
+; CHECK-NOF16-NEXT:    mov.b32 %r4, {%rs7, %rs5};
+; CHECK-NOF16-NEXT:    mov.b32 {%rs8, %rs9}, %r4;
+; CHECK-NOF16-NEXT:    setp.lt.s16 %p4, %rs9, 0;
+; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs5, %rs6, %p4;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r5, %rs6;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs11, %rs10, %rs6, %p5;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r6, %rs7;
+; CHECK-NOF16-NEXT:    setp.ltu.f32 %p6, %r6, %r3;
+; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs7, %rs3, %p6;
+; CHECK-NOF16-NEXT:    setp.lt.s16 %p7, %rs8, 0;
+; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs7, %rs12, %p7;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r7, %rs12;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p8, %r7, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs12, %p8;
+; CHECK-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs11};
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_v2half(
@@ -902,40 +902,38 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
 ;
 ; CHECK-SM80-NOF16-LABEL: minimum_v2half(
 ; CHECK-SM80-NOF16:       {
-; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<11>;
+; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<9>;
 ; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<15>;
-; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<7>;
+; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<8>;
 ; CHECK-SM80-NOF16-EMPTY:
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.v2.b16 {%rs1, %rs2}, [minimum_v2half_param_0];
 ; CHECK-SM80-NOF16-NEXT:    ld.param.v2.b16 {%rs3, %rs4}, [minimum_v2half_param_1];
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r1, %rs4;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs2;
-; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p1, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, -32768;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, -32768;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r4, %rs3;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r5, %rs1;
-; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p6, %r5, %r4;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, -32768;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, -32768;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
-; CHECK-SM80-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs9};
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs5;
+; CHECK-SM80-NOF16-NEXT:    setp.ltu.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p2;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs3;
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p3, %r3, %r3;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs3, %rs1, %p3;
+; CHECK-SM80-NOF16-NEXT:    mov.b32 %r4, {%rs7, %rs5};
+; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs8, %rs9}, %r4;
+; CHECK-SM80-NOF16-NEXT:    setp.lt.s16 %p4, %rs9, 0;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs5, %rs6, %p4;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r5, %rs6;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, %rs10, %rs6, %p5;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r6, %rs7;
+; CHECK-SM80-NOF16-NEXT:    setp.ltu.f32 %p6, %r6, %r3;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs7, %rs3, %p6;
+; CHECK-SM80-NOF16-NEXT:    setp.lt.s16 %p7, %rs8, 0;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs7, %rs12, %p7;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r7, %rs12;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p8, %r7, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs12, %p8;
+; CHECK-SM80-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs11};
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %x
@@ -1121,27 +1119,25 @@ define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) {
 define half @maximum_half(half %a, half %b) {
 ; CHECK-NOF16-LABEL: maximum_half(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<6>;
-; CHECK-NOF16-NEXT:    .reg .b16 %rs<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
+; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
 ; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [maximum_half_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [maximum_half_param_1];
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r1, %rs2;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs1;
-; CHECK-NOF16-NEXT:    setp.gt.f32 %p1, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs2, %rs1, %p1;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs3;
+; CHECK-NOF16-NEXT:    setp.gtu.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs4, %rs3, %rs2, %p2;
+; CHECK-NOF16-NEXT:    setp.gt.s16 %p3, %rs3, -1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs3, %rs4, %p3;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r3, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p4;
+; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_half(
@@ -1157,27 +1153,25 @@ define half @maximum_half(half %a, half %b) {
 ;
 ; CHECK-SM80-NOF16-LABEL: maximum_half(
 ; CHECK-SM80-NOF16:       {
-; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<6>;
-; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<8>;
+; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<5>;
+; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<7>;
 ; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-SM80-NOF16-EMPTY:
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [maximum_half_param_0];
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [maximum_half_param_1];
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r1, %rs2;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs1;
-; CHECK-SM80-NOF16-NEXT:    setp.gt.f32 %p1, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs2, %rs1, %p1;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs3;
+; CHECK-SM80-NOF16-NEXT:    setp.gtu.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, %rs3, %rs2, %p2;
+; CHECK-SM80-NOF16-NEXT:    setp.gt.s16 %p3, %rs3, -1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs3, %rs4, %p3;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p4, %r3, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p4;
+; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call half @llvm.maximum.f16(half %a, half %b)
   ret half %x
@@ -1192,8 +1186,8 @@ define float @maximum_imm1(float %a) {
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_imm1_param_0];
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    max.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    selp.f32 %r2, %r1, 0f00000000, %p1;
+; CHECK-NOF16-NEXT:    max.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
@@ -1229,8 +1223,8 @@ define float @maximum_imm2(float %a) {
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_imm2_param_0];
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    max.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT:    selp.f32 %r2, %r1, 0f00000000, %p1;
+; CHECK-NOF16-NEXT:    max.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NOF16-NEXT:    ret;
 ;
@@ -1260,16 +1254,18 @@ define float @maximum_imm2(float %a) {
 define float @maximum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: maximum_float(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    max.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r2, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, %r2, %r1, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r3, %r3;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, %r3, %r2, %p2;
+; CHECK-NOF16-NEXT:    max.f32 %r5, %r3, %r4;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_float(
@@ -1300,16 +1296,18 @@ define float @maximum_float(float %a, float %b) {
 define float @maximum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: maximum_float_ftz(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<2>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_ftz_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    max.ftz.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r2, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, %r2, %r1, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p2, %r3, %r3;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, %r3, %r2, %p2;
+; CHECK-NOF16-NEXT:    max.ftz.f32 %r5, %r3, %r4;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_float_ftz(
@@ -1340,16 +1338,18 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
 define double @maximum_double(double %a, double %b) {
 ; CHECK-LABEL: maximum_double(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-NEXT:    .reg .pred %p<3>;
+; CHECK-NEXT:    .reg .b64 %rd<6>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [maximum_double_param_0];
 ; CHECK-NEXT:    ld.param.b64 %rd2, [maximum_double_param_1];
-; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
-; CHECK-NEXT:    max.f64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    setp.nan.f64 %p1, %rd2, %rd2;
+; CHECK-NEXT:    selp.f64 %rd3, %rd2, %rd1, %p1;
+; CHECK-NEXT:    setp.nan.f64 %p2, %rd3, %rd3;
+; CHECK-NEXT:    selp.f64 %rd4, %rd3, %rd2, %p2;
+; CHECK-NEXT:    max.f64 %rd5, %rd3, %rd4;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd5;
 ; CHECK-NEXT:    ret;
   %x = call double @llvm.maximum.f64(double %a, double %b)
   ret double %x
@@ -1358,40 +1358,38 @@ define double @maximum_double(double %a, double %b) {
 define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-NOF16-LABEL: maximum_v2half(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<11>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<9>;
 ; CHECK-NOF16-NEXT:    .reg .b16 %rs<15>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<7>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.v2.b16 {%rs1, %rs2}, [maximum_v2half_param_0];
 ; CHECK-NOF16-NEXT:    ld.param.v2.b16 {%rs3, %rs4}, [maximum_v2half_param_1];
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r1, %rs4;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs2;
-; CHECK-NOF16-NEXT:    setp.gt.f32 %p1, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r4, %rs3;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r5, %rs1;
-; CHECK-NOF16-NEXT:    setp.gt.f32 %p6, %r5, %r4;
-; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
-; CHECK-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
-; CHECK-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs9};
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs5;
+; CHECK-NOF16-NEXT:    setp.gtu.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p2;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs3;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p3, %r3, %r3;
+; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs3, %rs1, %p3;
+; CHECK-NOF16-NEXT:    mov.b32 %r4, {%rs7, %rs5};
+; CHECK-NOF16-NEXT:    mov.b32 {%rs8, %rs9}, %r4;
+; CHECK-NOF16-NEXT:    setp.gt.s16 %p4, %rs9, -1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs5, %rs6, %p4;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r5, %rs6;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs11, %rs10, %rs6, %p5;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r6, %rs7;
+; CHECK-NOF16-NEXT:    setp.gtu.f32 %p6, %r6, %r3;
+; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs7, %rs3, %p6;
+; CHECK-NOF16-NEXT:    setp.gt.s16 %p7, %rs8, -1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs7, %rs12, %p7;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r7, %rs12;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p8, %r7, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs12, %p8;
+; CHECK-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs11};
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: maximum_v2half(
@@ -1407,40 +1405,38 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
 ;
 ; CHECK-SM80-NOF16-LABEL: maximum_v2half(
 ; CHECK-SM80-NOF16:       {
-; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<11>;
+; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<9>;
 ; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<15>;
-; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<7>;
+; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<8>;
 ; CHECK-SM80-NOF16-EMPTY:
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.v2.b16 {%rs1, %rs2}, [maximum_v2half_param_0];
 ; CHECK-SM80-NOF16-NEXT:    ld.param.v2.b16 {%rs3, %rs4}, [maximum_v2half_param_1];
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r1, %rs4;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs2;
-; CHECK-SM80-NOF16-NEXT:    setp.gt.f32 %p1, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r4, %rs3;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r5, %rs1;
-; CHECK-SM80-NOF16-NEXT:    setp.gt.f32 %p6, %r5, %r4;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
-; CHECK-SM80-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs9};
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs5;
+; CHECK-SM80-NOF16-NEXT:    setp.gtu.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs5, %rs4, %p2;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs3;
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p3, %r3, %r3;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs3, %rs1, %p3;
+; CHECK-SM80-NOF16-NEXT:    mov.b32 %r4, {%rs7, %rs5};
+; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs8, %rs9}, %r4;
+; CHECK-SM80-NOF16-NEXT:    setp.gt.s16 %p4, %rs9, -1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs5, %rs6, %p4;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r5, %rs6;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, %rs10, %rs6, %p5;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r6, %rs7;
+; CHECK-SM80-NOF16-NEXT:    setp.gtu.f32 %p6, %r6, %r3;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs7, %rs3, %p6;
+; CHECK-SM80-NOF16-NEXT:    setp.gt.s16 %p7, %rs8, -1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs7, %rs12, %p7;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r7, %rs12;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p8, %r7, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs12, %p8;
+; CHECK-SM80-NOF16-NEXT:    st.param.v2.b16 [func_retval0], {%rs14, %rs11};
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %x
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
index 6d9eb13376827..17b7f42d8c3ce 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
@@ -4,44 +4,37 @@
 define fp128 @f128_minimum(fp128 %a, fp128 %b) {
 ; CHECK-LABEL: f128_minimum:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscmpuqp 0, 2, 3
+; CHECK-NEXT:    xscmpuqp 0, 3, 3
 ; CHECK-NEXT:    vmr 4, 2
-; CHECK-NEXT:    bge 0, .LBB0_8
+; CHECK-NEXT:    vmr 2, 3
+; CHECK-NEXT:    bun 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    bun 0, .LBB0_9
+; CHECK-NEXT:    vmr 2, 4
 ; CHECK-NEXT:  .LBB0_2: # %entry
-; CHECK-NEXT:    xststdcqp 0, 2, 4
-; CHECK-NEXT:    bc 4, 2, .LBB0_10
-; CHECK-NEXT:  .LBB0_3: # %entry
-; CHECK-NEXT:    xststdcqp 0, 3, 4
-; CHECK-NEXT:    bc 12, 2, .LBB0_5
-; CHECK-NEXT:  .LBB0_4: # %entry
-; CHECK-NEXT:    vmr 3, 2
-; CHECK-NEXT:  .LBB0_5: # %entry
-; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; CHECK-NEXT:    lxv 34, 0(3)
-; CHECK-NEXT:    xscmpuqp 0, 4, 2
-; CHECK-NEXT:    beq 0, .LBB0_7
-; CHECK-NEXT:  # %bb.6: # %entry
-; CHECK-NEXT:    vmr 3, 4
-; CHECK-NEXT:  .LBB0_7: # %entry
-; CHECK-NEXT:    vmr 2, 3
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB0_8: # %entry
+; CHECK-NEXT:    xscmpuqp 0, 2, 3
+; CHECK-NEXT:    vmr 4, 2
+; CHECK-NEXT:    cror 20, 0, 3
+; CHECK-NEXT:    bc 12, 20, .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    vmr 4, 3
-; CHECK-NEXT:    bnu 0, .LBB0_2
-; CHECK-NEXT:  .LBB0_9:
+; CHECK-NEXT:  .LBB0_4: # %entry
+; CHECK-NEXT:    xscvqpdpo 3, 2
+; CHECK-NEXT:    xsrsp 0, 35
+; CHECK-NEXT:    xscvdpspn 0, 0
+; CHECK-NEXT:    mffprwz 3, 0
+; CHECK-NEXT:    cmpwi 3, 0
+; CHECK-NEXT:    blt 0, .LBB0_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    vmr 2, 4
+; CHECK-NEXT:  .LBB0_6: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT:    lxv 36, 0(3)
-; CHECK-NEXT:    xststdcqp 0, 2, 4
-; CHECK-NEXT:    bc 12, 2, .LBB0_3
-; CHECK-NEXT:  .LBB0_10: # %entry
+; CHECK-NEXT:    lxv 35, 0(3)
+; CHECK-NEXT:    xscmpuqp 0, 4, 3
+; CHECK-NEXT:    beqlr 0
+; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    vmr 2, 4
-; CHECK-NEXT:    xststdcqp 0, 3, 4
-; CHECK-NEXT:    bc 4, 2, .LBB0_4
-; CHECK-NEXT:    b .LBB0_5
+; CHECK-NEXT:    blr
 entry:
   %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b)
   ret fp128 %m
@@ -50,44 +43,37 @@ entry:
 define fp128 @f128_maximum(fp128 %a, fp128 %b) {
 ; CHECK-LABEL: f128_maximum:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscmpuqp 0, 2, 3
+; CHECK-NEXT:    xscmpuqp 0, 3, 3
 ; CHECK-NEXT:    vmr 4, 2
-; CHECK-NEXT:    ble 0, .LBB1_8
+; CHECK-NEXT:    vmr 2, 3
+; CHECK-NEXT:    bun 0, .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    bun 0, .LBB1_9
+; CHECK-NEXT:    vmr 2, 4
 ; CHECK-NEXT:  .LBB1_2: # %entry
-; CHECK-NEXT:    xststdcqp 0, 2, 8
-; CHECK-NEXT:    bc 4, 2, .LBB1_10
-; CHECK-NEXT:  .LBB1_3: # %entry
-; CHECK-NEXT:    xststdcqp 0, 3, 8
-; CHECK-NEXT:    bc 12, 2, .LBB1_5
-; CHECK-NEXT:  .LBB1_4: # %entry
-; CHECK-NEXT:    vmr 3, 2
-; CHECK-NEXT:  .LBB1_5: # %entry
-; CHECK-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
-; CHECK-NEXT:    lxv 34, 0(3)
-; CHECK-NEXT:    xscmpuqp 0, 4, 2
-; CHECK-NEXT:    beq 0, .LBB1_7
-; CHECK-NEXT:  # %bb.6: # %entry
-; CHECK-NEXT:    vmr 3, 4
-; CHECK-NEXT:  .LBB1_7: # %entry
-; CHECK-NEXT:    vmr 2, 3
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB1_8: # %entry
+; CHECK-NEXT:    xscmpuqp 0, 2, 3
+; CHECK-NEXT:    vmr 4, 2
+; CHECK-NEXT:    cror 20, 1, 3
+; CHECK-NEXT:    bc 12, 20, .LBB1_4
+; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    vmr 4, 3
-; CHECK-NEXT:    bnu 0, .LBB1_2
-; CHECK-NEXT:  .LBB1_9:
+; CHECK-NEXT:  .LBB1_4: # %entry
+; CHECK-NEXT:    xscvqpdpo 3, 2
+; CHECK-NEXT:    xsrsp 0, 35
+; CHECK-NEXT:    xscvdpspn 0, 0
+; CHECK-NEXT:    mffprwz 3, 0
+; CHECK-NEXT:    cmpwi 3, -1
+; CHECK-NEXT:    bgt 0, .LBB1_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    vmr 2, 4
+; CHECK-NEXT:  .LBB1_6: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-NEXT:    lxv 36, 0(3)
-; CHECK-NEXT:    xststdcqp 0, 2, 8
-; CHECK-NEXT:    bc 12, 2, .LBB1_3
-; CHECK-NEXT:  .LBB1_10: # %entry
+; CHECK-NEXT:    lxv 35, 0(3)
+; CHECK-NEXT:    xscmpuqp 0, 4, 3
+; CHECK-NEXT:    beqlr 0
+; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    vmr 2, 4
-; CHECK-NEXT:    xststdcqp 0, 3, 8
-; CHECK-NEXT:    bc 4, 2, .LBB1_4
-; CHECK-NEXT:    b .LBB1_5
+; CHECK-NEXT:    blr
 entry:
   %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b)
   ret fp128 %m
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index 39cf136e10d77..7349318bc32fc 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -6,65 +6,65 @@
 define float @f32_minimum(float %a, float %b) {
 ; NOVSX-LABEL: f32_minimum:
 ; NOVSX:       # %bb.0: # %entry
+; NOVSX-NEXT:    fcmpu 0, 2, 2
+; NOVSX-NEXT:    fmr 0, 1
+; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    bc 12, 3, .LBB0_2
+; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:  .LBB0_2: # %entry
 ; NOVSX-NEXT:    fcmpu 0, 1, 2
 ; NOVSX-NEXT:    fmr 0, 1
-; NOVSX-NEXT:    stfs 2, -8(1)
 ; NOVSX-NEXT:    stfs 1, -4(1)
-; NOVSX-NEXT:    bc 12, 0, .LBB0_2
-; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    cror 20, 0, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB0_4
+; NOVSX-NEXT:  # %bb.3: # %entry
 ; NOVSX-NEXT:    fmr 0, 2
-; NOVSX-NEXT:  .LBB0_2: # %entry
-; NOVSX-NEXT:    lwz 3, -4(1)
-; NOVSX-NEXT:    bc 4, 3, .LBB0_4
-; NOVSX-NEXT:  # %bb.3:
-; NOVSX-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
-; NOVSX-NEXT:    lfs 0, .LCPI0_0 at toc@l(4)
 ; NOVSX-NEXT:  .LBB0_4: # %entry
-; NOVSX-NEXT:    xoris 3, 3, 32768
-; NOVSX-NEXT:    lwz 4, -8(1)
-; NOVSX-NEXT:    cmplwi 3, 0
-; NOVSX-NEXT:    bc 12, 2, .LBB0_6
+; NOVSX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; NOVSX-NEXT:    lfs 2, .LCPI0_0 at toc@l(3)
+; NOVSX-NEXT:    lwz 3, -4(1)
+; NOVSX-NEXT:    fcmpu 0, 0, 2
+; NOVSX-NEXT:    bc 4, 2, .LBB0_6
 ; NOVSX-NEXT:  # %bb.5: # %entry
-; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:    cmpwi 3, 0
+; NOVSX-NEXT:    bclr 12, 0, 0
 ; NOVSX-NEXT:  .LBB0_6: # %entry
-; NOVSX-NEXT:    xoris 3, 4, 32768
-; NOVSX-NEXT:    cmplwi 3, 0
-; NOVSX-NEXT:    bc 12, 2, .LBB0_8
-; NOVSX-NEXT:  # %bb.7: # %entry
-; NOVSX-NEXT:    fmr 2, 1
-; NOVSX-NEXT:  .LBB0_8: # %entry
-; NOVSX-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; NOVSX-NEXT:    lfs 1, .LCPI0_1 at toc@l(3)
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB0_10
-; NOVSX-NEXT:  # %bb.9: # %entry
-; NOVSX-NEXT:    fmr 2, 0
-; NOVSX-NEXT:  .LBB0_10: # %entry
-; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    fmr 1, 0
 ; NOVSX-NEXT:    blr
 ;
 ; VSX-LABEL: f32_minimum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    fcmpu 0, 1, 2
+; VSX-NEXT:    fcmpu 0, 2, 2
+; VSX-NEXT:    fmr 0, 2
 ; VSX-NEXT:    bc 12, 3, .LBB0_2
 ; VSX-NEXT:  # %bb.1: # %entry
-; VSX-NEXT:    xsmindp 1, 1, 2
-; VSX-NEXT:    blr
-; VSX-NEXT:  .LBB0_2:
-; VSX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; VSX-NEXT:    lfs 1, .LCPI0_0 at toc@l(3)
+; VSX-NEXT:    fmr 0, 1
+; VSX-NEXT:  .LBB0_2: # %entry
+; VSX-NEXT:    fcmpu 0, 0, 0
+; VSX-NEXT:    fmr 1, 0
+; VSX-NEXT:    bc 12, 3, .LBB0_4
+; VSX-NEXT:  # %bb.3: # %entry
+; VSX-NEXT:    fmr 1, 2
+; VSX-NEXT:  .LBB0_4: # %entry
+; VSX-NEXT:    xsmindp 1, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: f32_minimum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    fcmpu 0, 1, 2
+; AIX-NEXT:    fcmpu 0, 2, 2
+; AIX-NEXT:    fmr 0, 2
 ; AIX-NEXT:    bc 12, 3, L..BB0_2
 ; AIX-NEXT:  # %bb.1: # %entry
-; AIX-NEXT:    xsmindp 1, 1, 2
-; AIX-NEXT:    blr
-; AIX-NEXT:  L..BB0_2:
-; AIX-NEXT:    ld 3, L..C0(2) # %const.0
-; AIX-NEXT:    lfs 1, 0(3)
+; AIX-NEXT:    fmr 0, 1
+; AIX-NEXT:  L..BB0_2: # %entry
+; AIX-NEXT:    fcmpu 0, 0, 0
+; AIX-NEXT:    fmr 1, 0
+; AIX-NEXT:    bc 12, 3, L..BB0_4
+; AIX-NEXT:  # %bb.3: # %entry
+; AIX-NEXT:    fmr 1, 2
+; AIX-NEXT:  L..BB0_4: # %entry
+; AIX-NEXT:    xsmindp 1, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call float @llvm.minimum.f32(float %a, float %b)
@@ -74,63 +74,65 @@ entry:
 define float @f32_maximum(float %a, float %b) {
 ; NOVSX-LABEL: f32_maximum:
 ; NOVSX:       # %bb.0: # %entry
+; NOVSX-NEXT:    fcmpu 0, 2, 2
+; NOVSX-NEXT:    fmr 0, 1
+; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    bc 12, 3, .LBB1_2
+; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:  .LBB1_2: # %entry
 ; NOVSX-NEXT:    fcmpu 0, 1, 2
 ; NOVSX-NEXT:    fmr 0, 1
-; NOVSX-NEXT:    stfs 2, -8(1)
 ; NOVSX-NEXT:    stfs 1, -4(1)
-; NOVSX-NEXT:    bc 12, 1, .LBB1_2
-; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    cror 20, 1, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB1_4
+; NOVSX-NEXT:  # %bb.3: # %entry
 ; NOVSX-NEXT:    fmr 0, 2
-; NOVSX-NEXT:  .LBB1_2: # %entry
-; NOVSX-NEXT:    lwz 3, -4(1)
-; NOVSX-NEXT:    bc 4, 3, .LBB1_4
-; NOVSX-NEXT:  # %bb.3:
-; NOVSX-NEXT:    addis 4, 2, .LCPI1_0 at toc@ha
-; NOVSX-NEXT:    lfs 0, .LCPI1_0 at toc@l(4)
 ; NOVSX-NEXT:  .LBB1_4: # %entry
-; NOVSX-NEXT:    cmpwi 3, 0
-; NOVSX-NEXT:    lwz 4, -8(1)
-; NOVSX-NEXT:    bc 12, 2, .LBB1_6
+; NOVSX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; NOVSX-NEXT:    lfs 2, .LCPI1_0 at toc@l(3)
+; NOVSX-NEXT:    lwz 3, -4(1)
+; NOVSX-NEXT:    fcmpu 0, 0, 2
+; NOVSX-NEXT:    bc 4, 2, .LBB1_6
 ; NOVSX-NEXT:  # %bb.5: # %entry
-; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:    cmpwi 3, -1
+; NOVSX-NEXT:    bclr 12, 1, 0
 ; NOVSX-NEXT:  .LBB1_6: # %entry
-; NOVSX-NEXT:    cmpwi 4, 0
-; NOVSX-NEXT:    bc 12, 2, .LBB1_8
-; NOVSX-NEXT:  # %bb.7: # %entry
-; NOVSX-NEXT:    fmr 2, 1
-; NOVSX-NEXT:  .LBB1_8: # %entry
-; NOVSX-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
-; NOVSX-NEXT:    lfs 1, .LCPI1_1 at toc@l(3)
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB1_10
-; NOVSX-NEXT:  # %bb.9: # %entry
-; NOVSX-NEXT:    fmr 2, 0
-; NOVSX-NEXT:  .LBB1_10: # %entry
-; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    fmr 1, 0
 ; NOVSX-NEXT:    blr
 ;
 ; VSX-LABEL: f32_maximum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    fcmpu 0, 1, 2
+; VSX-NEXT:    fcmpu 0, 2, 2
+; VSX-NEXT:    fmr 0, 2
 ; VSX-NEXT:    bc 12, 3, .LBB1_2
 ; VSX-NEXT:  # %bb.1: # %entry
-; VSX-NEXT:    xsmaxdp 1, 1, 2
-; VSX-NEXT:    blr
-; VSX-NEXT:  .LBB1_2:
-; VSX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; VSX-NEXT:    lfs 1, .LCPI1_0 at toc@l(3)
+; VSX-NEXT:    fmr 0, 1
+; VSX-NEXT:  .LBB1_2: # %entry
+; VSX-NEXT:    fcmpu 0, 0, 0
+; VSX-NEXT:    fmr 1, 0
+; VSX-NEXT:    bc 12, 3, .LBB1_4
+; VSX-NEXT:  # %bb.3: # %entry
+; VSX-NEXT:    fmr 1, 2
+; VSX-NEXT:  .LBB1_4: # %entry
+; VSX-NEXT:    xsmaxdp 1, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: f32_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    fcmpu 0, 1, 2
+; AIX-NEXT:    fcmpu 0, 2, 2
+; AIX-NEXT:    fmr 0, 2
 ; AIX-NEXT:    bc 12, 3, L..BB1_2
 ; AIX-NEXT:  # %bb.1: # %entry
-; AIX-NEXT:    xsmaxdp 1, 1, 2
-; AIX-NEXT:    blr
-; AIX-NEXT:  L..BB1_2:
-; AIX-NEXT:    ld 3, L..C1(2) # %const.0
-; AIX-NEXT:    lfs 1, 0(3)
+; AIX-NEXT:    fmr 0, 1
+; AIX-NEXT:  L..BB1_2: # %entry
+; AIX-NEXT:    fcmpu 0, 0, 0
+; AIX-NEXT:    fmr 1, 0
+; AIX-NEXT:    bc 12, 3, L..BB1_4
+; AIX-NEXT:  # %bb.3: # %entry
+; AIX-NEXT:    fmr 1, 2
+; AIX-NEXT:  L..BB1_4: # %entry
+; AIX-NEXT:    xsmaxdp 1, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call float @llvm.maximum.f32(float %a, float %b)
@@ -140,65 +142,65 @@ entry:
 define double @f64_minimum(double %a, double %b) {
 ; NOVSX-LABEL: f64_minimum:
 ; NOVSX:       # %bb.0: # %entry
+; NOVSX-NEXT:    fcmpu 0, 2, 2
+; NOVSX-NEXT:    fmr 0, 1
+; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    bc 12, 3, .LBB2_2
+; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:  .LBB2_2: # %entry
 ; NOVSX-NEXT:    fcmpu 0, 1, 2
 ; NOVSX-NEXT:    fmr 0, 1
-; NOVSX-NEXT:    stfd 2, -16(1)
 ; NOVSX-NEXT:    stfd 1, -8(1)
-; NOVSX-NEXT:    bc 12, 0, .LBB2_2
-; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    cror 20, 0, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB2_4
+; NOVSX-NEXT:  # %bb.3: # %entry
 ; NOVSX-NEXT:    fmr 0, 2
-; NOVSX-NEXT:  .LBB2_2: # %entry
-; NOVSX-NEXT:    ld 3, -8(1)
-; NOVSX-NEXT:    bc 4, 3, .LBB2_4
-; NOVSX-NEXT:  # %bb.3:
-; NOVSX-NEXT:    addis 4, 2, .LCPI2_0 at toc@ha
-; NOVSX-NEXT:    lfs 0, .LCPI2_0 at toc@l(4)
 ; NOVSX-NEXT:  .LBB2_4: # %entry
-; NOVSX-NEXT:    li 5, 1
-; NOVSX-NEXT:    ld 4, -16(1)
-; NOVSX-NEXT:    rldic 5, 5, 63, 0
-; NOVSX-NEXT:    cmpd 3, 5
-; NOVSX-NEXT:    bc 12, 2, .LBB2_6
+; NOVSX-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; NOVSX-NEXT:    lfs 2, .LCPI2_0 at toc@l(3)
+; NOVSX-NEXT:    ld 3, -8(1)
+; NOVSX-NEXT:    fcmpu 0, 0, 2
+; NOVSX-NEXT:    bc 4, 2, .LBB2_6
 ; NOVSX-NEXT:  # %bb.5: # %entry
-; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:    cmpdi 3, 0
+; NOVSX-NEXT:    bclr 12, 0, 0
 ; NOVSX-NEXT:  .LBB2_6: # %entry
-; NOVSX-NEXT:    cmpd 4, 5
-; NOVSX-NEXT:    bc 12, 2, .LBB2_8
-; NOVSX-NEXT:  # %bb.7: # %entry
-; NOVSX-NEXT:    fmr 2, 1
-; NOVSX-NEXT:  .LBB2_8: # %entry
-; NOVSX-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
-; NOVSX-NEXT:    lfs 1, .LCPI2_1 at toc@l(3)
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB2_10
-; NOVSX-NEXT:  # %bb.9: # %entry
-; NOVSX-NEXT:    fmr 2, 0
-; NOVSX-NEXT:  .LBB2_10: # %entry
-; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    fmr 1, 0
 ; NOVSX-NEXT:    blr
 ;
 ; VSX-LABEL: f64_minimum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    fcmpu 0, 1, 2
+; VSX-NEXT:    fcmpu 0, 2, 2
+; VSX-NEXT:    fmr 0, 2
 ; VSX-NEXT:    bc 12, 3, .LBB2_2
 ; VSX-NEXT:  # %bb.1: # %entry
-; VSX-NEXT:    xsmindp 1, 1, 2
-; VSX-NEXT:    blr
-; VSX-NEXT:  .LBB2_2:
-; VSX-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; VSX-NEXT:    lfs 1, .LCPI2_0 at toc@l(3)
+; VSX-NEXT:    fmr 0, 1
+; VSX-NEXT:  .LBB2_2: # %entry
+; VSX-NEXT:    fcmpu 0, 0, 0
+; VSX-NEXT:    fmr 1, 0
+; VSX-NEXT:    bc 12, 3, .LBB2_4
+; VSX-NEXT:  # %bb.3: # %entry
+; VSX-NEXT:    fmr 1, 2
+; VSX-NEXT:  .LBB2_4: # %entry
+; VSX-NEXT:    xsmindp 1, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: f64_minimum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    fcmpu 0, 1, 2
+; AIX-NEXT:    fcmpu 0, 2, 2
+; AIX-NEXT:    fmr 0, 2
 ; AIX-NEXT:    bc 12, 3, L..BB2_2
 ; AIX-NEXT:  # %bb.1: # %entry
-; AIX-NEXT:    xsmindp 1, 1, 2
-; AIX-NEXT:    blr
-; AIX-NEXT:  L..BB2_2:
-; AIX-NEXT:    ld 3, L..C2(2) # %const.0
-; AIX-NEXT:    lfs 1, 0(3)
+; AIX-NEXT:    fmr 0, 1
+; AIX-NEXT:  L..BB2_2: # %entry
+; AIX-NEXT:    fcmpu 0, 0, 0
+; AIX-NEXT:    fmr 1, 0
+; AIX-NEXT:    bc 12, 3, L..BB2_4
+; AIX-NEXT:  # %bb.3: # %entry
+; AIX-NEXT:    fmr 1, 2
+; AIX-NEXT:  L..BB2_4: # %entry
+; AIX-NEXT:    xsmindp 1, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call double @llvm.minimum.f64(double %a, double %b)
@@ -208,63 +210,65 @@ entry:
 define double @f64_maximum(double %a, double %b) {
 ; NOVSX-LABEL: f64_maximum:
 ; NOVSX:       # %bb.0: # %entry
+; NOVSX-NEXT:    fcmpu 0, 2, 2
+; NOVSX-NEXT:    fmr 0, 1
+; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    bc 12, 3, .LBB3_2
+; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:  .LBB3_2: # %entry
 ; NOVSX-NEXT:    fcmpu 0, 1, 2
 ; NOVSX-NEXT:    fmr 0, 1
-; NOVSX-NEXT:    stfd 2, -16(1)
 ; NOVSX-NEXT:    stfd 1, -8(1)
-; NOVSX-NEXT:    bc 12, 1, .LBB3_2
-; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    cror 20, 1, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB3_4
+; NOVSX-NEXT:  # %bb.3: # %entry
 ; NOVSX-NEXT:    fmr 0, 2
-; NOVSX-NEXT:  .LBB3_2: # %entry
-; NOVSX-NEXT:    ld 3, -8(1)
-; NOVSX-NEXT:    bc 4, 3, .LBB3_4
-; NOVSX-NEXT:  # %bb.3:
-; NOVSX-NEXT:    addis 4, 2, .LCPI3_0 at toc@ha
-; NOVSX-NEXT:    lfs 0, .LCPI3_0 at toc@l(4)
 ; NOVSX-NEXT:  .LBB3_4: # %entry
-; NOVSX-NEXT:    cmpdi 3, 0
-; NOVSX-NEXT:    ld 4, -16(1)
-; NOVSX-NEXT:    bc 12, 2, .LBB3_6
+; NOVSX-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; NOVSX-NEXT:    lfs 2, .LCPI3_0 at toc@l(3)
+; NOVSX-NEXT:    ld 3, -8(1)
+; NOVSX-NEXT:    fcmpu 0, 0, 2
+; NOVSX-NEXT:    bc 4, 2, .LBB3_6
 ; NOVSX-NEXT:  # %bb.5: # %entry
-; NOVSX-NEXT:    fmr 1, 0
+; NOVSX-NEXT:    cmpdi 3, -1
+; NOVSX-NEXT:    bclr 12, 1, 0
 ; NOVSX-NEXT:  .LBB3_6: # %entry
-; NOVSX-NEXT:    cmpdi 4, 0
-; NOVSX-NEXT:    bc 12, 2, .LBB3_8
-; NOVSX-NEXT:  # %bb.7: # %entry
-; NOVSX-NEXT:    fmr 2, 1
-; NOVSX-NEXT:  .LBB3_8: # %entry
-; NOVSX-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
-; NOVSX-NEXT:    lfs 1, .LCPI3_1 at toc@l(3)
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB3_10
-; NOVSX-NEXT:  # %bb.9: # %entry
-; NOVSX-NEXT:    fmr 2, 0
-; NOVSX-NEXT:  .LBB3_10: # %entry
-; NOVSX-NEXT:    fmr 1, 2
+; NOVSX-NEXT:    fmr 1, 0
 ; NOVSX-NEXT:    blr
 ;
 ; VSX-LABEL: f64_maximum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    fcmpu 0, 1, 2
+; VSX-NEXT:    fcmpu 0, 2, 2
+; VSX-NEXT:    fmr 0, 2
 ; VSX-NEXT:    bc 12, 3, .LBB3_2
 ; VSX-NEXT:  # %bb.1: # %entry
-; VSX-NEXT:    xsmaxdp 1, 1, 2
-; VSX-NEXT:    blr
-; VSX-NEXT:  .LBB3_2:
-; VSX-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; VSX-NEXT:    lfs 1, .LCPI3_0 at toc@l(3)
+; VSX-NEXT:    fmr 0, 1
+; VSX-NEXT:  .LBB3_2: # %entry
+; VSX-NEXT:    fcmpu 0, 0, 0
+; VSX-NEXT:    fmr 1, 0
+; VSX-NEXT:    bc 12, 3, .LBB3_4
+; VSX-NEXT:  # %bb.3: # %entry
+; VSX-NEXT:    fmr 1, 2
+; VSX-NEXT:  .LBB3_4: # %entry
+; VSX-NEXT:    xsmaxdp 1, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: f64_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    fcmpu 0, 1, 2
+; AIX-NEXT:    fcmpu 0, 2, 2
+; AIX-NEXT:    fmr 0, 2
 ; AIX-NEXT:    bc 12, 3, L..BB3_2
 ; AIX-NEXT:  # %bb.1: # %entry
-; AIX-NEXT:    xsmaxdp 1, 1, 2
-; AIX-NEXT:    blr
-; AIX-NEXT:  L..BB3_2:
-; AIX-NEXT:    ld 3, L..C3(2) # %const.0
-; AIX-NEXT:    lfs 1, 0(3)
+; AIX-NEXT:    fmr 0, 1
+; AIX-NEXT:  L..BB3_2: # %entry
+; AIX-NEXT:    fcmpu 0, 0, 0
+; AIX-NEXT:    fmr 1, 0
+; AIX-NEXT:    bc 12, 3, L..BB3_4
+; AIX-NEXT:  # %bb.3: # %entry
+; AIX-NEXT:    fmr 1, 2
+; AIX-NEXT:  L..BB3_4: # %entry
+; AIX-NEXT:    xsmaxdp 1, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call double @llvm.maximum.f64(double %a, double %b)
@@ -274,53 +278,62 @@ entry:
 define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; NOVSX-LABEL: v4f32_minimum:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    vcmpeqfp 0, 3, 3
-; NOVSX-NEXT:    vcmpeqfp 1, 2, 2
-; NOVSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; NOVSX-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
-; NOVSX-NEXT:    vnot 0, 0
-; NOVSX-NEXT:    vnot 1, 1
-; NOVSX-NEXT:    vspltisb 4, -1
-; NOVSX-NEXT:    vcmpgtfp 5, 3, 2
-; NOVSX-NEXT:    vslw 4, 4, 4
-; NOVSX-NEXT:    vor 0, 1, 0
-; NOVSX-NEXT:    lvx 1, 0, 3
-; NOVSX-NEXT:    vsel 5, 3, 2, 5
-; NOVSX-NEXT:    vsel 5, 5, 1, 0
-; NOVSX-NEXT:    vcmpequw 0, 2, 4
-; NOVSX-NEXT:    vcmpequw 4, 3, 4
-; NOVSX-NEXT:    vsel 2, 5, 2, 0
+; NOVSX-NEXT:    vcmpeqfp 4, 3, 3
+; NOVSX-NEXT:    addi 3, 1, -48
+; NOVSX-NEXT:    vnot 4, 4
+; NOVSX-NEXT:    stvx 3, 0, 3
+; NOVSX-NEXT:    addi 3, 1, -32
 ; NOVSX-NEXT:    vsel 2, 2, 3, 4
 ; NOVSX-NEXT:    vxor 3, 3, 3
+; NOVSX-NEXT:    stvx 2, 0, 3
+; NOVSX-NEXT:    vcmpgtsw 4, 3, 2
+; NOVSX-NEXT:    lwz 3, -36(1)
+; NOVSX-NEXT:    lwz 4, -20(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    isellt 3, 4, 3
+; NOVSX-NEXT:    lwz 4, -24(1)
+; NOVSX-NEXT:    stw 3, -4(1)
+; NOVSX-NEXT:    lwz 3, -40(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    isellt 3, 4, 3
+; NOVSX-NEXT:    lwz 4, -28(1)
+; NOVSX-NEXT:    stw 3, -8(1)
+; NOVSX-NEXT:    lwz 3, -44(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    isellt 3, 4, 3
+; NOVSX-NEXT:    lwz 4, -32(1)
+; NOVSX-NEXT:    stw 3, -12(1)
+; NOVSX-NEXT:    lwz 3, -48(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    isellt 3, 4, 3
+; NOVSX-NEXT:    stw 3, -16(1)
+; NOVSX-NEXT:    addi 3, 1, -16
+; NOVSX-NEXT:    lvx 5, 0, 3
 ; NOVSX-NEXT:    vcmpeqfp 3, 5, 3
+; NOVSX-NEXT:    vsel 2, 5, 2, 4
 ; NOVSX-NEXT:    vsel 2, 5, 2, 3
 ; NOVSX-NEXT:    blr
 ;
 ; VSX-LABEL: v4f32_minimum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    xvcmpeqsp 1, 35, 35
-; VSX-NEXT:    xvcmpeqsp 2, 34, 34
-; VSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; VSX-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
+; VSX-NEXT:    xvcmpeqsp 0, 35, 35
+; VSX-NEXT:    xxlnor 0, 0, 0
+; VSX-NEXT:    xxsel 0, 34, 35, 0
+; VSX-NEXT:    xvcmpeqsp 1, 0, 0
 ; VSX-NEXT:    xxlnor 1, 1, 1
-; VSX-NEXT:    xxlnor 2, 2, 2
-; VSX-NEXT:    xvminsp 0, 34, 35
-; VSX-NEXT:    xxlor 1, 2, 1
-; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxsel 1, 35, 0, 1
+; VSX-NEXT:    xvminsp 34, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_minimum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    xvcmpeqsp 1, 35, 35
-; AIX-NEXT:    xvcmpeqsp 2, 34, 34
-; AIX-NEXT:    ld 3, L..C4(2) # %const.0
-; AIX-NEXT:    xvminsp 0, 34, 35
+; AIX-NEXT:    xvcmpeqsp 0, 35, 35
+; AIX-NEXT:    xxlnor 0, 0, 0
+; AIX-NEXT:    xxsel 0, 34, 35, 0
+; AIX-NEXT:    xvcmpeqsp 1, 0, 0
 ; AIX-NEXT:    xxlnor 1, 1, 1
-; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    xxlor 1, 2, 1
-; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxsel 1, 35, 0, 1
+; AIX-NEXT:    xvminsp 34, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -330,51 +343,63 @@ entry:
 define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; NOVSX-LABEL: v4f32_maximum:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    vcmpeqfp 5, 3, 3
-; NOVSX-NEXT:    vcmpeqfp 0, 2, 2
-; NOVSX-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
-; NOVSX-NEXT:    addi 3, 3, .LCPI5_0 at toc@l
-; NOVSX-NEXT:    vnot 5, 5
-; NOVSX-NEXT:    vnot 0, 0
-; NOVSX-NEXT:    vcmpgtfp 4, 2, 3
-; NOVSX-NEXT:    vor 5, 0, 5
-; NOVSX-NEXT:    lvx 0, 0, 3
-; NOVSX-NEXT:    vsel 4, 3, 2, 4
-; NOVSX-NEXT:    vsel 4, 4, 0, 5
-; NOVSX-NEXT:    vxor 5, 5, 5
-; NOVSX-NEXT:    vcmpequw 0, 2, 5
-; NOVSX-NEXT:    vsel 2, 4, 2, 0
-; NOVSX-NEXT:    vcmpequw 0, 3, 5
-; NOVSX-NEXT:    vsel 2, 2, 3, 0
-; NOVSX-NEXT:    vcmpeqfp 3, 4, 5
-; NOVSX-NEXT:    vsel 2, 4, 2, 3
+; NOVSX-NEXT:    vcmpeqfp 4, 3, 3
+; NOVSX-NEXT:    addi 3, 1, -48
+; NOVSX-NEXT:    vnot 4, 4
+; NOVSX-NEXT:    stvx 3, 0, 3
+; NOVSX-NEXT:    addi 3, 1, -32
+; NOVSX-NEXT:    vsel 2, 2, 3, 4
+; NOVSX-NEXT:    vxor 3, 3, 3
+; NOVSX-NEXT:    stvx 2, 0, 3
+; NOVSX-NEXT:    vcmpgtsw 4, 3, 2
+; NOVSX-NEXT:    lwz 3, -36(1)
+; NOVSX-NEXT:    lwz 4, -20(1)
+; NOVSX-NEXT:    vnot 4, 4
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    iselgt 3, 4, 3
+; NOVSX-NEXT:    lwz 4, -24(1)
+; NOVSX-NEXT:    stw 3, -4(1)
+; NOVSX-NEXT:    lwz 3, -40(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    iselgt 3, 4, 3
+; NOVSX-NEXT:    lwz 4, -28(1)
+; NOVSX-NEXT:    stw 3, -8(1)
+; NOVSX-NEXT:    lwz 3, -44(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    iselgt 3, 4, 3
+; NOVSX-NEXT:    lwz 4, -32(1)
+; NOVSX-NEXT:    stw 3, -12(1)
+; NOVSX-NEXT:    lwz 3, -48(1)
+; NOVSX-NEXT:    cmplw 4, 3
+; NOVSX-NEXT:    iselgt 3, 4, 3
+; NOVSX-NEXT:    stw 3, -16(1)
+; NOVSX-NEXT:    addi 3, 1, -16
+; NOVSX-NEXT:    lvx 5, 0, 3
+; NOVSX-NEXT:    vcmpeqfp 3, 5, 3
+; NOVSX-NEXT:    vsel 2, 5, 2, 4
+; NOVSX-NEXT:    vsel 2, 5, 2, 3
 ; NOVSX-NEXT:    blr
 ;
 ; VSX-LABEL: v4f32_maximum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    xvcmpeqsp 1, 35, 35
-; VSX-NEXT:    xvcmpeqsp 2, 34, 34
-; VSX-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
-; VSX-NEXT:    addi 3, 3, .LCPI5_0 at toc@l
+; VSX-NEXT:    xvcmpeqsp 0, 35, 35
+; VSX-NEXT:    xxlnor 0, 0, 0
+; VSX-NEXT:    xxsel 0, 34, 35, 0
+; VSX-NEXT:    xvcmpeqsp 1, 0, 0
 ; VSX-NEXT:    xxlnor 1, 1, 1
-; VSX-NEXT:    xxlnor 2, 2, 2
-; VSX-NEXT:    xvmaxsp 0, 34, 35
-; VSX-NEXT:    xxlor 1, 2, 1
-; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxsel 1, 35, 0, 1
+; VSX-NEXT:    xvmaxsp 34, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    xvcmpeqsp 1, 35, 35
-; AIX-NEXT:    xvcmpeqsp 2, 34, 34
-; AIX-NEXT:    ld 3, L..C5(2) # %const.0
-; AIX-NEXT:    xvmaxsp 0, 34, 35
+; AIX-NEXT:    xvcmpeqsp 0, 35, 35
+; AIX-NEXT:    xxlnor 0, 0, 0
+; AIX-NEXT:    xxsel 0, 34, 35, 0
+; AIX-NEXT:    xvcmpeqsp 1, 0, 0
 ; AIX-NEXT:    xxlnor 1, 1, 1
-; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    xxlor 1, 2, 1
-; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxsel 1, 35, 0, 1
+; AIX-NEXT:    xvmaxsp 34, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -384,105 +409,77 @@ entry:
 define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
 ; NOVSX-LABEL: v2f64_minimum:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    fcmpu 0, 1, 3
-; NOVSX-NEXT:    fmr 6, 1
-; NOVSX-NEXT:    stfd 4, -16(1)
-; NOVSX-NEXT:    stfd 2, -8(1)
-; NOVSX-NEXT:    stfd 3, -32(1)
-; NOVSX-NEXT:    stfd 1, -24(1)
-; NOVSX-NEXT:    bc 12, 0, .LBB6_2
+; NOVSX-NEXT:    fcmpu 0, 3, 3
+; NOVSX-NEXT:    fmr 0, 2
+; NOVSX-NEXT:    fmr 2, 1
+; NOVSX-NEXT:    fmr 1, 3
+; NOVSX-NEXT:    bc 12, 3, .LBB6_2
 ; NOVSX-NEXT:  # %bb.1: # %entry
-; NOVSX-NEXT:    fmr 6, 3
+; NOVSX-NEXT:    fmr 1, 2
 ; NOVSX-NEXT:  .LBB6_2: # %entry
-; NOVSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; NOVSX-NEXT:    ld 4, -24(1)
-; NOVSX-NEXT:    lfs 0, .LCPI6_0 at toc@l(3)
-; NOVSX-NEXT:    fmr 5, 0
+; NOVSX-NEXT:    fcmpu 0, 4, 4
+; NOVSX-NEXT:    fmr 2, 4
+; NOVSX-NEXT:    stfd 1, -8(1)
 ; NOVSX-NEXT:    bc 12, 3, .LBB6_4
 ; NOVSX-NEXT:  # %bb.3: # %entry
-; NOVSX-NEXT:    fmr 5, 6
+; NOVSX-NEXT:    fmr 2, 0
 ; NOVSX-NEXT:  .LBB6_4: # %entry
-; NOVSX-NEXT:    li 3, 1
-; NOVSX-NEXT:    ld 5, -32(1)
-; NOVSX-NEXT:    rldic 3, 3, 63, 0
-; NOVSX-NEXT:    cmpd 4, 3
-; NOVSX-NEXT:    bc 12, 2, .LBB6_6
+; NOVSX-NEXT:    fcmpu 0, 1, 3
+; NOVSX-NEXT:    fmr 5, 1
+; NOVSX-NEXT:    stfd 2, -16(1)
+; NOVSX-NEXT:    cror 20, 0, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB6_6
 ; NOVSX-NEXT:  # %bb.5: # %entry
-; NOVSX-NEXT:    fmr 1, 5
+; NOVSX-NEXT:    fmr 5, 3
 ; NOVSX-NEXT:  .LBB6_6: # %entry
-; NOVSX-NEXT:    cmpd 5, 3
-; NOVSX-NEXT:    bc 12, 2, .LBB6_8
+; NOVSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
+; NOVSX-NEXT:    lfs 0, .LCPI6_0 at toc@l(3)
+; NOVSX-NEXT:    ld 3, -8(1)
+; NOVSX-NEXT:    fcmpu 0, 5, 0
+; NOVSX-NEXT:    bc 4, 2, .LBB6_8
 ; NOVSX-NEXT:  # %bb.7: # %entry
-; NOVSX-NEXT:    fmr 3, 1
+; NOVSX-NEXT:    cmpdi 3, 0
+; NOVSX-NEXT:    bc 12, 0, .LBB6_9
 ; NOVSX-NEXT:  .LBB6_8: # %entry
-; NOVSX-NEXT:    addis 4, 2, .LCPI6_1 at toc@ha
-; NOVSX-NEXT:    lfs 1, .LCPI6_1 at toc@l(4)
-; NOVSX-NEXT:    fcmpu 0, 5, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB6_10
-; NOVSX-NEXT:  # %bb.9: # %entry
-; NOVSX-NEXT:    fmr 3, 5
-; NOVSX-NEXT:  .LBB6_10: # %entry
+; NOVSX-NEXT:    fmr 1, 5
+; NOVSX-NEXT:  .LBB6_9: # %entry
 ; NOVSX-NEXT:    fcmpu 0, 2, 4
-; NOVSX-NEXT:    fmr 5, 2
-; NOVSX-NEXT:    bc 12, 0, .LBB6_12
-; NOVSX-NEXT:  # %bb.11: # %entry
-; NOVSX-NEXT:    fmr 5, 4
-; NOVSX-NEXT:  .LBB6_12: # %entry
-; NOVSX-NEXT:    ld 5, -8(1)
-; NOVSX-NEXT:    bc 12, 3, .LBB6_14
-; NOVSX-NEXT:  # %bb.13: # %entry
-; NOVSX-NEXT:    fmr 0, 5
-; NOVSX-NEXT:  .LBB6_14: # %entry
-; NOVSX-NEXT:    cmpd 5, 3
-; NOVSX-NEXT:    ld 4, -16(1)
-; NOVSX-NEXT:    bc 4, 2, .LBB6_19
-; NOVSX-NEXT:  # %bb.15: # %entry
-; NOVSX-NEXT:    cmpd 4, 3
-; NOVSX-NEXT:    bc 4, 2, .LBB6_20
-; NOVSX-NEXT:  .LBB6_16: # %entry
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB6_18
-; NOVSX-NEXT:  .LBB6_17: # %entry
-; NOVSX-NEXT:    fmr 4, 0
-; NOVSX-NEXT:  .LBB6_18: # %entry
-; NOVSX-NEXT:    fmr 1, 3
-; NOVSX-NEXT:    fmr 2, 4
+; NOVSX-NEXT:    fmr 3, 2
+; NOVSX-NEXT:    cror 20, 0, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB6_11
+; NOVSX-NEXT:  # %bb.10: # %entry
+; NOVSX-NEXT:    fmr 3, 4
+; NOVSX-NEXT:  .LBB6_11: # %entry
+; NOVSX-NEXT:    fcmpu 0, 3, 0
+; NOVSX-NEXT:    ld 3, -16(1)
+; NOVSX-NEXT:    bc 4, 2, .LBB6_13
+; NOVSX-NEXT:  # %bb.12: # %entry
+; NOVSX-NEXT:    cmpdi 3, 0
+; NOVSX-NEXT:    bclr 12, 0, 0
+; NOVSX-NEXT:  .LBB6_13: # %entry
+; NOVSX-NEXT:    fmr 2, 3
 ; NOVSX-NEXT:    blr
-; NOVSX-NEXT:  .LBB6_19: # %entry
-; NOVSX-NEXT:    fmr 2, 0
-; NOVSX-NEXT:    cmpd 4, 3
-; NOVSX-NEXT:    bc 12, 2, .LBB6_16
-; NOVSX-NEXT:  .LBB6_20: # %entry
-; NOVSX-NEXT:    fmr 4, 2
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 4, 2, .LBB6_17
-; NOVSX-NEXT:    b .LBB6_18
 ;
 ; VSX-LABEL: v2f64_minimum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; VSX-NEXT:    xvmindp 0, 34, 35
-; VSX-NEXT:    xvcmpeqdp 35, 35, 35
-; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; VSX-NEXT:    xvcmpeqdp 34, 34, 34
-; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xvcmpeqdp 36, 35, 35
+; VSX-NEXT:    xxlnor 36, 36, 36
+; VSX-NEXT:    xxsel 0, 34, 35, 36
+; VSX-NEXT:    xvcmpeqdp 34, 0, 0
 ; VSX-NEXT:    xxlnor 34, 34, 34
-; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 34, 35
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxsel 1, 35, 0, 34
+; VSX-NEXT:    xvmindp 34, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_minimum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    ld 3, L..C6(2) # %const.0
-; AIX-NEXT:    xvmindp 0, 34, 35
-; AIX-NEXT:    xvcmpeqdp 35, 35, 35
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xvcmpeqdp 34, 34, 34
-; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xvcmpeqdp 36, 35, 35
+; AIX-NEXT:    xxlnor 36, 36, 36
+; AIX-NEXT:    xxsel 0, 34, 35, 36
+; AIX-NEXT:    xvcmpeqdp 34, 0, 0
 ; AIX-NEXT:    xxlnor 34, 34, 34
-; AIX-NEXT:    xxlor 1, 34, 35
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxsel 1, 35, 0, 34
+; AIX-NEXT:    xvmindp 34, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
@@ -492,103 +489,77 @@ entry:
 define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
 ; NOVSX-LABEL: v2f64_maximum:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    fcmpu 0, 1, 3
-; NOVSX-NEXT:    fmr 6, 1
-; NOVSX-NEXT:    stfd 4, -16(1)
-; NOVSX-NEXT:    stfd 2, -8(1)
-; NOVSX-NEXT:    stfd 3, -32(1)
-; NOVSX-NEXT:    stfd 1, -24(1)
-; NOVSX-NEXT:    bc 12, 1, .LBB7_2
+; NOVSX-NEXT:    fcmpu 0, 3, 3
+; NOVSX-NEXT:    fmr 0, 2
+; NOVSX-NEXT:    fmr 2, 1
+; NOVSX-NEXT:    fmr 1, 3
+; NOVSX-NEXT:    bc 12, 3, .LBB7_2
 ; NOVSX-NEXT:  # %bb.1: # %entry
-; NOVSX-NEXT:    fmr 6, 3
+; NOVSX-NEXT:    fmr 1, 2
 ; NOVSX-NEXT:  .LBB7_2: # %entry
-; NOVSX-NEXT:    addis 4, 2, .LCPI7_0 at toc@ha
-; NOVSX-NEXT:    ld 3, -24(1)
-; NOVSX-NEXT:    lfs 0, .LCPI7_0 at toc@l(4)
-; NOVSX-NEXT:    fmr 5, 0
+; NOVSX-NEXT:    fcmpu 0, 4, 4
+; NOVSX-NEXT:    fmr 2, 4
+; NOVSX-NEXT:    stfd 1, -8(1)
 ; NOVSX-NEXT:    bc 12, 3, .LBB7_4
 ; NOVSX-NEXT:  # %bb.3: # %entry
-; NOVSX-NEXT:    fmr 5, 6
+; NOVSX-NEXT:    fmr 2, 0
 ; NOVSX-NEXT:  .LBB7_4: # %entry
-; NOVSX-NEXT:    cmpdi 3, 0
-; NOVSX-NEXT:    ld 4, -32(1)
-; NOVSX-NEXT:    bc 12, 2, .LBB7_6
+; NOVSX-NEXT:    fcmpu 0, 1, 3
+; NOVSX-NEXT:    fmr 5, 1
+; NOVSX-NEXT:    stfd 2, -16(1)
+; NOVSX-NEXT:    cror 20, 1, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB7_6
 ; NOVSX-NEXT:  # %bb.5: # %entry
-; NOVSX-NEXT:    fmr 1, 5
+; NOVSX-NEXT:    fmr 5, 3
 ; NOVSX-NEXT:  .LBB7_6: # %entry
-; NOVSX-NEXT:    cmpdi 4, 0
-; NOVSX-NEXT:    bc 12, 2, .LBB7_8
+; NOVSX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
+; NOVSX-NEXT:    lfs 0, .LCPI7_0 at toc@l(3)
+; NOVSX-NEXT:    ld 3, -8(1)
+; NOVSX-NEXT:    fcmpu 0, 5, 0
+; NOVSX-NEXT:    bc 4, 2, .LBB7_8
 ; NOVSX-NEXT:  # %bb.7: # %entry
-; NOVSX-NEXT:    fmr 3, 1
+; NOVSX-NEXT:    cmpdi 3, -1
+; NOVSX-NEXT:    bc 12, 1, .LBB7_9
 ; NOVSX-NEXT:  .LBB7_8: # %entry
-; NOVSX-NEXT:    addis 3, 2, .LCPI7_1 at toc@ha
-; NOVSX-NEXT:    lfs 1, .LCPI7_1 at toc@l(3)
-; NOVSX-NEXT:    fcmpu 0, 5, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB7_10
-; NOVSX-NEXT:  # %bb.9: # %entry
-; NOVSX-NEXT:    fmr 3, 5
-; NOVSX-NEXT:  .LBB7_10: # %entry
+; NOVSX-NEXT:    fmr 1, 5
+; NOVSX-NEXT:  .LBB7_9: # %entry
 ; NOVSX-NEXT:    fcmpu 0, 2, 4
-; NOVSX-NEXT:    fmr 5, 2
-; NOVSX-NEXT:    bc 12, 1, .LBB7_12
-; NOVSX-NEXT:  # %bb.11: # %entry
-; NOVSX-NEXT:    fmr 5, 4
-; NOVSX-NEXT:  .LBB7_12: # %entry
-; NOVSX-NEXT:    ld 4, -8(1)
-; NOVSX-NEXT:    bc 12, 3, .LBB7_14
-; NOVSX-NEXT:  # %bb.13: # %entry
-; NOVSX-NEXT:    fmr 0, 5
-; NOVSX-NEXT:  .LBB7_14: # %entry
-; NOVSX-NEXT:    cmpdi 4, 0
+; NOVSX-NEXT:    fmr 3, 2
+; NOVSX-NEXT:    cror 20, 1, 3
+; NOVSX-NEXT:    bc 12, 20, .LBB7_11
+; NOVSX-NEXT:  # %bb.10: # %entry
+; NOVSX-NEXT:    fmr 3, 4
+; NOVSX-NEXT:  .LBB7_11: # %entry
+; NOVSX-NEXT:    fcmpu 0, 3, 0
 ; NOVSX-NEXT:    ld 3, -16(1)
-; NOVSX-NEXT:    bc 4, 2, .LBB7_19
-; NOVSX-NEXT:  # %bb.15: # %entry
-; NOVSX-NEXT:    cmpdi 3, 0
-; NOVSX-NEXT:    bc 4, 2, .LBB7_20
-; NOVSX-NEXT:  .LBB7_16: # %entry
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 12, 2, .LBB7_18
-; NOVSX-NEXT:  .LBB7_17: # %entry
-; NOVSX-NEXT:    fmr 4, 0
-; NOVSX-NEXT:  .LBB7_18: # %entry
-; NOVSX-NEXT:    fmr 1, 3
-; NOVSX-NEXT:    fmr 2, 4
+; NOVSX-NEXT:    bc 4, 2, .LBB7_13
+; NOVSX-NEXT:  # %bb.12: # %entry
+; NOVSX-NEXT:    cmpdi 3, -1
+; NOVSX-NEXT:    bclr 12, 1, 0
+; NOVSX-NEXT:  .LBB7_13: # %entry
+; NOVSX-NEXT:    fmr 2, 3
 ; NOVSX-NEXT:    blr
-; NOVSX-NEXT:  .LBB7_19: # %entry
-; NOVSX-NEXT:    fmr 2, 0
-; NOVSX-NEXT:    cmpdi 3, 0
-; NOVSX-NEXT:    bc 12, 2, .LBB7_16
-; NOVSX-NEXT:  .LBB7_20: # %entry
-; NOVSX-NEXT:    fmr 4, 2
-; NOVSX-NEXT:    fcmpu 0, 0, 1
-; NOVSX-NEXT:    bc 4, 2, .LBB7_17
-; NOVSX-NEXT:    b .LBB7_18
 ;
 ; VSX-LABEL: v2f64_maximum:
 ; VSX:       # %bb.0: # %entry
-; VSX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; VSX-NEXT:    xvmaxdp 0, 34, 35
-; VSX-NEXT:    xvcmpeqdp 35, 35, 35
-; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; VSX-NEXT:    xvcmpeqdp 34, 34, 34
-; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xvcmpeqdp 36, 35, 35
+; VSX-NEXT:    xxlnor 36, 36, 36
+; VSX-NEXT:    xxsel 0, 34, 35, 36
+; VSX-NEXT:    xvcmpeqdp 34, 0, 0
 ; VSX-NEXT:    xxlnor 34, 34, 34
-; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 34, 35
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxsel 1, 35, 0, 34
+; VSX-NEXT:    xvmaxdp 34, 0, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    ld 3, L..C7(2) # %const.0
-; AIX-NEXT:    xvmaxdp 0, 34, 35
-; AIX-NEXT:    xvcmpeqdp 35, 35, 35
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xvcmpeqdp 34, 34, 34
-; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xvcmpeqdp 36, 35, 35
+; AIX-NEXT:    xxlnor 36, 36, 36
+; AIX-NEXT:    xxsel 0, 34, 35, 36
+; AIX-NEXT:    xvcmpeqdp 34, 0, 0
 ; AIX-NEXT:    xxlnor 34, 34, 34
-; AIX-NEXT:    xxlor 1, 34, 35
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxsel 1, 35, 0, 34
+; AIX-NEXT:    xvmaxdp 34, 0, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 06515e4f82687..21eee72245b45 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -1819,170 +1819,162 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
 ;
 ; AVX512-LABEL: test_fmaximum_v4f16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %rbp
-; AVX512-NEXT:    pushq %r15
-; AVX512-NEXT:    pushq %r14
-; AVX512-NEXT:    pushq %r13
-; AVX512-NEXT:    pushq %r12
-; AVX512-NEXT:    pushq %rbx
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
+; AVX512-NEXT:    vpsrld $16, %xmm1, %xmm2
 ; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
+; AVX512-NEXT:    vucomiss %xmm2, %xmm2
 ; AVX512-NEXT:    movl $65535, %ecx # imm = 0xFFFF
 ; AVX512-NEXT:    movl $0, %edx
 ; AVX512-NEXT:    cmovpl %ecx, %edx
-; AVX512-NEXT:    movl $0, %edi
-; AVX512-NEXT:    cmoval %ecx, %edi
-; AVX512-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
+; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm3
+; AVX512-NEXT:    vucomiss %xmm3, %xmm3
 ; AVX512-NEXT:    movl $0, %esi
 ; AVX512-NEXT:    cmovpl %ecx, %esi
-; AVX512-NEXT:    movl $0, %r9d
-; AVX512-NEXT:    cmoval %ecx, %r9d
-; AVX512-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
-; AVX512-NEXT:    movl $0, %r8d
-; AVX512-NEXT:    cmovpl %ecx, %r8d
-; AVX512-NEXT:    movl $0, %r11d
-; AVX512-NEXT:    cmoval %ecx, %r11d
-; AVX512-NEXT:    vpsrlq $48, %xmm1, %xmm2
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vpsrlq $48, %xmm0, %xmm3
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
-; AVX512-NEXT:    movl $0, %r10d
-; AVX512-NEXT:    cmovpl %ecx, %r10d
-; AVX512-NEXT:    movl $0, %ebp
-; AVX512-NEXT:    cmoval %ecx, %ebp
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
-; AVX512-NEXT:    movl $0, %ebx
-; AVX512-NEXT:    cmovpl %ecx, %ebx
-; AVX512-NEXT:    movl $0, %r14d
-; AVX512-NEXT:    cmoval %ecx, %r14d
-; AVX512-NEXT:    vpsrld $16, %xmm1, %xmm2
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm3
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
-; AVX512-NEXT:    movl $0, %r15d
-; AVX512-NEXT:    cmovpl %ecx, %r15d
-; AVX512-NEXT:    movl $0, %r12d
-; AVX512-NEXT:    cmoval %ecx, %r12d
-; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm2
-; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm3
-; AVX512-NEXT:    vucomiss %xmm2, %xmm3
-; AVX512-NEXT:    movl $0, %r13d
-; AVX512-NEXT:    cmoval %ecx, %r13d
-; AVX512-NEXT:    vmovd %r13d, %xmm2
-; AVX512-NEXT:    vpinsrw $1, %r12d, %xmm2, %xmm2
-; AVX512-NEXT:    vpinsrw $2, %r14d, %xmm2, %xmm2
-; AVX512-NEXT:    vpinsrw $3, %ebp, %xmm2, %xmm2
-; AVX512-NEXT:    vpinsrw $4, %r11d, %xmm2, %xmm2
-; AVX512-NEXT:    vpinsrw $5, %r9d, %xmm2, %xmm2
-; AVX512-NEXT:    vpinsrw $6, %edi, %xmm2, %xmm2
-; AVX512-NEXT:    movl $0, %edi
-; AVX512-NEXT:    cmovpl %ecx, %edi
-; AVX512-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT:    vmovd %esi, %xmm4
+; AVX512-NEXT:    vpinsrw $1, %edx, %xmm4, %xmm5
+; AVX512-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
 ; AVX512-NEXT:    vcvtph2ps %xmm4, %xmm4
-; AVX512-NEXT:    vucomiss %xmm3, %xmm4
-; AVX512-NEXT:    movl $0, %r9d
-; AVX512-NEXT:    cmoval %ecx, %r9d
-; AVX512-NEXT:    vpinsrw $7, %r9d, %xmm2, %xmm2
-; AVX512-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm2
-; AVX512-NEXT:    vmovd %edi, %xmm3
-; AVX512-NEXT:    vpinsrw $1, %r15d, %xmm3, %xmm3
-; AVX512-NEXT:    vpinsrw $2, %ebx, %xmm3, %xmm3
-; AVX512-NEXT:    vpinsrw $3, %r10d, %xmm3, %xmm3
-; AVX512-NEXT:    vpinsrw $4, %r8d, %xmm3, %xmm3
-; AVX512-NEXT:    vpinsrw $5, %esi, %xmm3, %xmm3
-; AVX512-NEXT:    vpinsrw $6, %edx, %xmm3, %xmm3
+; AVX512-NEXT:    vucomiss %xmm4, %xmm4
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovpl %ecx, %edx
+; AVX512-NEXT:    vpinsrw $2, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; AVX512-NEXT:    vcvtph2ps %xmm6, %xmm6
+; AVX512-NEXT:    vucomiss %xmm6, %xmm6
 ; AVX512-NEXT:    movl $0, %edx
 ; AVX512-NEXT:    cmovpl %ecx, %edx
-; AVX512-NEXT:    vpinsrw $7, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; AVX512-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; AVX512-NEXT:    vpsrld $16, %xmm2, %xmm3
+; AVX512-NEXT:    vpinsrw $3, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vshufpd {{.*#+}} xmm7 = xmm1[1,0]
+; AVX512-NEXT:    vcvtph2ps %xmm7, %xmm8
+; AVX512-NEXT:    vucomiss %xmm8, %xmm8
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovpl %ecx, %edx
+; AVX512-NEXT:    vpinsrw $4, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT:    vcvtph2ps %xmm7, %xmm7
+; AVX512-NEXT:    vucomiss %xmm7, %xmm7
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovpl %ecx, %edx
+; AVX512-NEXT:    vpinsrw $5, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vshufps {{.*#+}} xmm9 = xmm1[3,3,3,3]
+; AVX512-NEXT:    vcvtph2ps %xmm9, %xmm9
+; AVX512-NEXT:    vucomiss %xmm9, %xmm9
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovpl %ecx, %edx
+; AVX512-NEXT:    vpinsrw $6, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vpsrldq {{.*#+}} xmm10 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm10
+; AVX512-NEXT:    vucomiss %xmm10, %xmm10
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovpl %ecx, %edx
+; AVX512-NEXT:    vpinsrw $7, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vpblendvb %xmm5, %xmm1, %xmm0, %xmm5
+; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT:    vucomiss %xmm0, %xmm10
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm0, %xmm10, %xmm10 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm10, %xmm0
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[3,3,3,3]
+; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm1
+; AVX512-NEXT:    vucomiss %xmm1, %xmm9
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm1, %xmm9, %xmm9 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm9, %xmm1
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm9 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX512-NEXT:    vpsrldq {{.*#+}} xmm10 = xmm5[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm10
+; AVX512-NEXT:    vucomiss %xmm10, %xmm7
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm10, %xmm7, %xmm7 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm10 = xmm5[2,3,0,1]
+; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm10
+; AVX512-NEXT:    vucomiss %xmm10, %xmm8
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm10, %xmm8, %xmm8 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm10 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3]
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm9 = xmm10[0],xmm9[0],xmm10[1],xmm9[1]
+; AVX512-NEXT:    vpsrlq $48, %xmm5, %xmm10
+; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm10
+; AVX512-NEXT:    vucomiss %xmm10, %xmm6
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm10, %xmm6, %xmm6 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm10 = xmm5[1,1,3,3]
+; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm10
+; AVX512-NEXT:    vucomiss %xmm10, %xmm4
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm10, %xmm4, %xmm4 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm10 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3]
+; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm11
+; AVX512-NEXT:    vucomiss %xmm11, %xmm3
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm11, %xmm3, %xmm3 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm11
+; AVX512-NEXT:    vpsrld $16, %xmm5, %xmm3
 ; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT:    vucomiss %xmm4, %xmm3
-; AVX512-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %edx
-; AVX512-NEXT:    cmovpl %eax, %edx
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm3
-; AVX512-NEXT:    vucomiss %xmm4, %xmm3
-; AVX512-NEXT:    movl $65535, %esi # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %esi
-; AVX512-NEXT:    cmovpl %eax, %esi
-; AVX512-NEXT:    vmovd %esi, %xmm3
-; AVX512-NEXT:    vpinsrw $1, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
-; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm5
-; AVX512-NEXT:    vucomiss %xmm4, %xmm5
-; AVX512-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %edx
-; AVX512-NEXT:    cmovpl %eax, %edx
-; AVX512-NEXT:    vpinsrw $2, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpsrlq $48, %xmm2, %xmm5
-; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm5
-; AVX512-NEXT:    vucomiss %xmm4, %xmm5
-; AVX512-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %edx
-; AVX512-NEXT:    cmovpl %eax, %edx
-; AVX512-NEXT:    vpinsrw $3, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
-; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm5
-; AVX512-NEXT:    vucomiss %xmm4, %xmm5
-; AVX512-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %edx
-; AVX512-NEXT:    cmovpl %eax, %edx
-; AVX512-NEXT:    vpinsrw $4, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm5
-; AVX512-NEXT:    vucomiss %xmm4, %xmm5
-; AVX512-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %edx
-; AVX512-NEXT:    cmovpl %eax, %edx
-; AVX512-NEXT:    vpinsrw $5, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[3,3,3,3]
-; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm5
-; AVX512-NEXT:    vucomiss %xmm4, %xmm5
-; AVX512-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; AVX512-NEXT:    cmovnel %eax, %edx
-; AVX512-NEXT:    cmovpl %eax, %edx
-; AVX512-NEXT:    vpinsrw $6, %edx, %xmm3, %xmm3
-; AVX512-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm5
-; AVX512-NEXT:    vucomiss %xmm4, %xmm5
-; AVX512-NEXT:    cmovnel %eax, %ecx
-; AVX512-NEXT:    cmovpl %eax, %ecx
-; AVX512-NEXT:    vpinsrw $7, %ecx, %xmm3, %xmm3
-; AVX512-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm5
-; AVX512-NEXT:    vpblendvb %xmm5, %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    vpcmpeqw %xmm4, %xmm1, %xmm4
-; AVX512-NEXT:    vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm3, %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    popq %rbx
-; AVX512-NEXT:    popq %r12
-; AVX512-NEXT:    popq %r13
-; AVX512-NEXT:    popq %r14
-; AVX512-NEXT:    popq %r15
-; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
+; AVX512-NEXT:    setb %dl
+; AVX512-NEXT:    kmovw %edx, %k1
+; AVX512-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm12
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm11[0],xmm12[0],xmm11[1],xmm12[1],xmm11[2],xmm12[2],xmm11[3],xmm12[3]
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1]
+; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm9[0]
+; AVX512-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; AVX512-NEXT:    vpcmpgtw %xmm5, %xmm3, %xmm3
+; AVX512-NEXT:    vpblendvb %xmm3, %xmm2, %xmm5, %xmm3
+; AVX512-NEXT:    vcvtph2ps %xmm12, %xmm5
+; AVX512-NEXT:    vpxor %xmm9, %xmm9, %xmm9
+; AVX512-NEXT:    vucomiss %xmm9, %xmm5
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovel %ecx, %edx
+; AVX512-NEXT:    vcvtph2ps %xmm11, %xmm5
+; AVX512-NEXT:    vucomiss %xmm9, %xmm5
+; AVX512-NEXT:    movl $0, %esi
+; AVX512-NEXT:    cmovel %ecx, %esi
+; AVX512-NEXT:    vmovd %esi, %xmm5
+; AVX512-NEXT:    vpinsrw $1, %edx, %xmm5, %xmm5
+; AVX512-NEXT:    vcvtph2ps %xmm4, %xmm4
+; AVX512-NEXT:    vucomiss %xmm9, %xmm4
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovel %ecx, %edx
+; AVX512-NEXT:    vpinsrw $2, %edx, %xmm5, %xmm4
+; AVX512-NEXT:    vcvtph2ps %xmm6, %xmm5
+; AVX512-NEXT:    vucomiss %xmm9, %xmm5
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovel %ecx, %edx
+; AVX512-NEXT:    vpinsrw $3, %edx, %xmm4, %xmm4
+; AVX512-NEXT:    vcvtph2ps %xmm8, %xmm5
+; AVX512-NEXT:    vucomiss %xmm9, %xmm5
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovel %ecx, %edx
+; AVX512-NEXT:    vpinsrw $4, %edx, %xmm4, %xmm4
+; AVX512-NEXT:    vcvtph2ps %xmm7, %xmm5
+; AVX512-NEXT:    vucomiss %xmm9, %xmm5
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovel %ecx, %edx
+; AVX512-NEXT:    vpinsrw $5, %edx, %xmm4, %xmm4
+; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm1
+; AVX512-NEXT:    vucomiss %xmm9, %xmm1
+; AVX512-NEXT:    movl $0, %edx
+; AVX512-NEXT:    cmovel %ecx, %edx
+; AVX512-NEXT:    vpinsrw $6, %edx, %xmm4, %xmm1
+; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT:    vucomiss %xmm9, %xmm0
+; AVX512-NEXT:    cmovel %ecx, %eax
+; AVX512-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX512-NEXT:    vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fmaximum_v4f16:

>From b62e0d67a1eb40791eefce0ae1084098dc4095ed Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 9 Dec 2025 14:58:08 +0800
Subject: [PATCH 6/6] Update amdgcn testcase

---
 .../AMDGPU/fcanonicalize-elimination.ll       | 1418 +----------------
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |    7 -
 2 files changed, 9 insertions(+), 1416 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index 72644a0a40df2..dea63c5d41b37 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx801 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx801 -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s
@@ -8,30 +7,6 @@
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 ; GFX9: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_no_fold_canonicalize_loaded_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_no_fold_canonicalize_loaded_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %v = load float, ptr addrspace(1) %gep, align 4
@@ -45,30 +20,6 @@ define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(ptr addrsp
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 ; GCN-NOT: 1.0
 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fmul_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 0x41700000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fmul_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x41700000, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -84,30 +35,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fmul_legacy_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_legacy_f32_e32 v2, 0x41700000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fmul_legacy_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, 0x41700000, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -123,30 +50,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(ptr addr
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_sub_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_sub_f32_e32 v2, 0x41700000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_sub_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_sub_f32_e32 v1, 0x41700000, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -162,30 +65,6 @@ define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_add_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f32_e32 v2, 0x41700000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_add_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 0x41700000, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -201,30 +80,6 @@ define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_sqrt_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_sqrt_f32_e32 v2, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_sqrt_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_sqrt_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -240,30 +95,6 @@ define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fceil_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_ceil_f32_e32 v2, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fceil_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_ceil_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -279,30 +110,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(ptr addrspace(
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_floor_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_floor_f32_e32 v2, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_floor_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_floor_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -319,32 +126,6 @@ define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(ptr addrspace(
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fma_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, 0x41700000
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_fma_f32 v2, v2, s0, s0
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fma_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_mov_b32 s2, 0x41700000
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_fma_f32 v1, v1, s2, s2
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -360,32 +141,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_mov_b32_e32 v3, 0x41700000
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mac_f32_e32 v3, 0x41700000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x41700000
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mac_f32_e32 v2, 0x41700000, v1
-; GFX9-NEXT:    global_store_dword v0, v2, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -404,59 +159,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(ptr addrspa
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 ; GCN-NOT: 1.0
 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(ptr addrspace(1) %arg) {
-; VI-FLUSH-LABEL: test_fold_canonicalize_fmuladd_value_f32:
-; VI-FLUSH:       ; %bb.0:
-; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-FLUSH-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-FLUSH-NEXT:    v_mov_b32_e32 v3, 0x41700000
-; VI-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-FLUSH-NEXT:    v_mov_b32_e32 v1, s1
-; VI-FLUSH-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-FLUSH-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-FLUSH-NEXT:    flat_load_dword v2, v[0:1]
-; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; VI-FLUSH-NEXT:    v_mac_f32_e32 v3, 0x41700000, v2
-; VI-FLUSH-NEXT:    flat_store_dword v[0:1], v3
-; VI-FLUSH-NEXT:    s_endpgm
-;
-; VI-DENORM-LABEL: test_fold_canonicalize_fmuladd_value_f32:
-; VI-DENORM:       ; %bb.0:
-; VI-DENORM-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-DENORM-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-DENORM-NEXT:    v_mov_b32_e32 v1, s1
-; VI-DENORM-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-DENORM-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-DENORM-NEXT:    flat_load_dword v2, v[0:1]
-; VI-DENORM-NEXT:    s_mov_b32 s0, 0x41700000
-; VI-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; VI-DENORM-NEXT:    v_fma_f32 v2, v2, s0, s0
-; VI-DENORM-NEXT:    flat_store_dword v[0:1], v2
-; VI-DENORM-NEXT:    s_endpgm
-;
-; GFX9-DENORM-LABEL: test_fold_canonicalize_fmuladd_value_f32:
-; GFX9-DENORM:       ; %bb.0:
-; GFX9-DENORM-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-DENORM-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-DENORM-NEXT:    s_mov_b32 s2, 0x41700000
-; GFX9-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DENORM-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DENORM-NEXT:    v_fma_f32 v1, v1, s2, s2
-; GFX9-DENORM-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-DENORM-NEXT:    s_endpgm
-;
-; GFX9-FLUSH-LABEL: test_fold_canonicalize_fmuladd_value_f32:
-; GFX9-FLUSH:       ; %bb.0:
-; GFX9-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-FLUSH-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLUSH-NEXT:    v_mov_b32_e32 v2, 0x41700000
-; GFX9-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-FLUSH-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLUSH-NEXT:    v_mac_f32_e32 v2, 0x41700000, v1
-; GFX9-FLUSH-NEXT:    global_store_dword v0, v2, s[0:1]
-; GFX9-FLUSH-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -475,30 +177,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(ptr addrspac
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_canonicalize_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_canonicalize_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -514,35 +192,6 @@ define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
-; VI-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_dword v1, v[1:2]
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s2, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v1
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f64_f32_e32 v[1:2], v1
-; GFX9-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -559,35 +208,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
-; VI-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_ushort v1, v[1:2]
-; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -604,35 +224,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
-; VI-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_ushort v1, v[1:2]
-; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -649,35 +240,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
-; VI-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f32_f64_e32 v2, v[1:2]
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[1:2], v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f32_f64_e32 v1, v[1:2]
-; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %load = load double, ptr addrspace(1) %gep, align 8
@@ -694,35 +256,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(ptr addr
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
-; VI-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_dword v1, v[1:2]
-; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f16_f32_e32 v3, v1
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; VI-NEXT:    flat_store_short v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -739,35 +272,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(ptr addr
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
-; VI-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_dword v1, v[1:2]
-; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f16_f32_e32 v3, v1
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; VI-NEXT:    flat_store_short v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -788,39 +292,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
-; VI-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; VI-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; VI-NEXT:    v_cvt_f16_f32_sdwa v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v2, v1, v2
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[1:2], v1, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT:    v_pack_b32_f16 v1, v1, v2
-; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %arg, i32 %id
   %load = load <2 x float>, ptr addrspace(1) %gep, align 8
@@ -835,30 +306,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(ptr
 ; VI:  v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_no_fold_canonicalize_fneg_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, -1.0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_no_fold_canonicalize_fneg_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e64 v1, -v1, -v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -874,32 +321,6 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(ptr addrspac
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fneg_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f32_e32 v2, 0, v2
-; VI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fneg_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
-; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -914,30 +335,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(ptr addrspace(1
 ; VI:  v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
 define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_no_fold_canonicalize_fabs_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e64 v2, 1.0, |v2|
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_no_fold_canonicalize_fabs_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -954,30 +351,6 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(ptr addrspac
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
 define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr addrspace(1) %arg, float %sign) {
-; VI-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e64 v2, 1.0, |v2|
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -995,32 +368,6 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_fabs_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f32_e32 v2, 0, v2
-; VI-NEXT:    v_and_b32_e32 v2, 0x7fffffff, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_fabs_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
-; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1037,33 +384,6 @@ define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_sin_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 0.15915494, v2
-; VI-NEXT:    v_fract_f32_e32 v2, v2
-; VI-NEXT:    v_sin_f32_e32 v2, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_sin_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0.15915494, v1
-; GFX9-NEXT:    v_sin_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1079,33 +399,6 @@ define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_cos_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 0.15915494, v2
-; VI-NEXT:    v_fract_f32_e32 v2, v2
-; VI-NEXT:    v_cos_f32_e32 v2, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_cos_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0.15915494, v1
-; GFX9-NEXT:    v_cos_f32_e32 v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1121,33 +414,6 @@ define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V0]]
 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_sin_value_f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_ushort v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
-; VI-NEXT:    v_fract_f16_e32 v2, v2
-; VI-NEXT:    v_sin_f16_e32 v2, v2
-; VI-NEXT:    flat_store_short v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_sin_value_f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX9-NEXT:    v_sin_f16_e32 v1, v1
-; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -1163,33 +429,6 @@ define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V0]]
 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_cos_value_f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_ushort v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
-; VI-NEXT:    v_fract_f16_e32 v2, v2
-; VI-NEXT:    v_cos_f16_e32 v2, v2
-; VI-NEXT:    flat_store_short v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_cos_value_f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX9-NEXT:    v_cos_f16_e32 v1, v1
-; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -1205,26 +444,6 @@ define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(ptr addrspace(1)
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_qNaN_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_qNaN_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
@@ -1243,32 +462,6 @@ define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(ptr addrspace(1
 
 ; GFX9: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; VI-NEXT:    v_min_f32_e32 v2, 0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_min_f32_e32 v1, 0, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1294,32 +487,6 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_iee
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_minnum_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f32_e32 v2, 0, v2
-; VI-NEXT:    v_min_f32_e32 v2, 0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_minnum_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
-; GFX9-NEXT:    v_min_f32_e32 v1, 0, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1337,32 +504,6 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
 ; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_sNaN_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; VI-NEXT:    v_min_f32_e32 v2, 0x7fc00000, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_sNaN_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_min_f32_e32 v1, 0x7fc00000, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1396,32 +537,6 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1
 ; GCN-NOT: v_max
 ; GCN:  {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; VI-NEXT:    v_max_f32_e32 v2, 0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, 0, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1437,32 +552,6 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_iee
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_maxnum_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f32_e32 v2, 0, v2
-; VI-NEXT:    v_max_f32_e32 v2, 0, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_maxnum_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, 0, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -1479,32 +568,6 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(ptr addrspace
 ; GCN-NOT: v_max
 ; GCN:  {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_maxnum_value_f64:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f64 v[2:3], v[2:3], 0
-; VI-NEXT:    v_max_f64 v[2:3], v[2:3], 0
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_maxnum_value_f64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], 0
-; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], 0
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %load = load double, ptr addrspace(1) %gep, align 8
@@ -1521,10 +584,6 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(ptr addrspace
 ; GCN-NOT: v_max
 ; GCN-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
-; GCN-LABEL: test_fold_canonicalize_fmul_value_f32_no_ieee:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    v_mul_f32_e32 v0, 0x41700000, v0
-; GCN-NEXT:    ; return to shader part epilog
 entry:
   %v = fmul float %arg, 15.0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
@@ -1537,10 +596,6 @@ entry:
 ; GCN-NOT: v_max
 ; GCN-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
-; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    v_mul_f32_e32 v0, 0x41700000, v0
-; GCN-NEXT:    ; return to shader part epilog
 entry:
   %v = fmul nnan float %arg, 15.0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
@@ -1553,73 +608,6 @@ entry:
 ; GCN-NOT: v_mul
 ; GCN: ; return
 define amdgpu_ps float @test_fold_canonicalize_fdiv_value_f32_no_ieee(float %arg0) {
-; VI-FLUSH-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
-; VI-FLUSH:       ; %bb.0: ; %entry
-; VI-FLUSH-NEXT:    s_mov_b32 s2, 0x41700000
-; VI-FLUSH-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
-; VI-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
-; VI-FLUSH-NEXT:    v_rcp_f32_e32 v3, v1
-; VI-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-FLUSH-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
-; VI-FLUSH-NEXT:    v_fma_f32 v3, v4, v3, v3
-; VI-FLUSH-NEXT:    v_mul_f32_e32 v4, v2, v3
-; VI-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v2
-; VI-FLUSH-NEXT:    v_fma_f32 v4, v5, v3, v4
-; VI-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v2
-; VI-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; VI-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
-; VI-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
-; VI-FLUSH-NEXT:    ; return to shader part epilog
-;
-; VI-DENORM-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
-; VI-DENORM:       ; %bb.0: ; %entry
-; VI-DENORM-NEXT:    s_mov_b32 s2, 0x41700000
-; VI-DENORM-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
-; VI-DENORM-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
-; VI-DENORM-NEXT:    v_rcp_f32_e32 v3, v1
-; VI-DENORM-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
-; VI-DENORM-NEXT:    v_fma_f32 v3, v4, v3, v3
-; VI-DENORM-NEXT:    v_mul_f32_e32 v4, v2, v3
-; VI-DENORM-NEXT:    v_fma_f32 v5, -v1, v4, v2
-; VI-DENORM-NEXT:    v_fma_f32 v4, v5, v3, v4
-; VI-DENORM-NEXT:    v_fma_f32 v1, -v1, v4, v2
-; VI-DENORM-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
-; VI-DENORM-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
-; VI-DENORM-NEXT:    ; return to shader part epilog
-;
-; GFX9-DENORM-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
-; GFX9-DENORM:       ; %bb.0: ; %entry
-; GFX9-DENORM-NEXT:    s_mov_b32 s2, 0x41700000
-; GFX9-DENORM-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
-; GFX9-DENORM-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
-; GFX9-DENORM-NEXT:    v_rcp_f32_e32 v3, v1
-; GFX9-DENORM-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
-; GFX9-DENORM-NEXT:    v_fma_f32 v3, v4, v3, v3
-; GFX9-DENORM-NEXT:    v_mul_f32_e32 v4, v2, v3
-; GFX9-DENORM-NEXT:    v_fma_f32 v5, -v1, v4, v2
-; GFX9-DENORM-NEXT:    v_fma_f32 v4, v5, v3, v4
-; GFX9-DENORM-NEXT:    v_fma_f32 v1, -v1, v4, v2
-; GFX9-DENORM-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
-; GFX9-DENORM-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
-; GFX9-DENORM-NEXT:    ; return to shader part epilog
-;
-; GFX9-FLUSH-LABEL: test_fold_canonicalize_fdiv_value_f32_no_ieee:
-; GFX9-FLUSH:       ; %bb.0: ; %entry
-; GFX9-FLUSH-NEXT:    s_mov_b32 s2, 0x41700000
-; GFX9-FLUSH-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
-; GFX9-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
-; GFX9-FLUSH-NEXT:    v_rcp_f32_e32 v3, v1
-; GFX9-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; GFX9-FLUSH-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
-; GFX9-FLUSH-NEXT:    v_fma_f32 v3, v4, v3, v3
-; GFX9-FLUSH-NEXT:    v_mul_f32_e32 v4, v2, v3
-; GFX9-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v2
-; GFX9-FLUSH-NEXT:    v_fma_f32 v4, v5, v3, v4
-; GFX9-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v2
-; GFX9-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; GFX9-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
-; GFX9-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, s2
-; GFX9-FLUSH-NEXT:    ; return to shader part epilog
 entry:
   %v = fdiv float 15.0, %arg0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
@@ -1634,33 +622,6 @@ entry:
 ; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 ; GFX9-FLUSH: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
-; VI-LABEL: test_fold_canonicalize_load_nnan_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v3, 1.0, v0
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_load_nnan_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %v = load float, ptr addrspace(1) %gep, align 4
@@ -1676,33 +637,6 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrsp
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
-; VI-LABEL: test_fold_canonicalize_load_nnan_value_f64:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s2, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_load_nnan_value_f64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %v = load double, ptr addrspace(1) %gep, align 8
@@ -1717,33 +651,6 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrsp
 ; GCN: v_max_f16_e32 [[V2:v[0-9]+]], [[V1]], [[V1]]
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V2]]
 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
-; VI-LABEL: test_fold_canonicalize_load_nnan_value_f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_ushort v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_max_f16_e32 v3, v0, v0
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_store_short v[0:1], v3
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_load_nnan_value_f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %v = load half, ptr addrspace(1) %gep, align 2
@@ -1760,45 +667,6 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrsp
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
 define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(ptr addrspace(1) %arg) {
-; VI-LABEL: test_fold_canonicalize_select_value_f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v2, v[0:1] glc
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    flat_load_dword v3, v[0:1] glc
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    flat_load_dword v4, v[0:1] glc
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f32_e32 v2, 0x41700000, v2
-; VI-NEXT:    v_add_f32_e32 v3, 0x42000000, v3
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
-; VI-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
-; VI-NEXT:    flat_store_dword v[0:1], v2
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_fold_canonicalize_select_value_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; kill: killed $vgpr0_vgpr1
-; GFX9-NEXT:    global_load_dword v3, v[0:1], off glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 0x41700000, v1
-; GFX9-NEXT:    v_add_f32_e32 v2, 0x42000000, v2
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load0 = load volatile float, ptr addrspace(1) %gep, align 4
@@ -1831,21 +699,6 @@ define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(ptr addrspace
 ; VI-DENORM: v_min_f32_e32 v0, v0, v1
 ; VI-DENORM-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %arg0, float %arg1) {
-; VI-FLUSH-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode:
-; VI-FLUSH:       ; %bb.0:
-; VI-FLUSH-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-FLUSH-NEXT:    ; return to shader part epilog
-;
-; VI-DENORM-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode:
-; VI-DENORM:       ; %bb.0:
-; VI-DENORM-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-DENORM-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -1861,21 +714,6 @@ define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %
 
 ; VI-NEXT: s_setpc_b64
 define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) {
-; VI-LABEL: test_fold_canonicalize_minnum_value_ieee_mode:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_fold_canonicalize_minnum_value_ieee_mode:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -1887,21 +725,6 @@ define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: ; return
 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) #1 {
-; VI-FLUSH-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
-; VI-FLUSH:       ; %bb.0:
-; VI-FLUSH-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-FLUSH-NEXT:    ; return to shader part epilog
-;
-; VI-DENORM-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
-; VI-DENORM:       ; %bb.0:
-; VI-DENORM-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-DENORM-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -1913,22 +736,6 @@ define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(fl
 ; GFX9-NOT: v_max
 ; GFX9-NOT: v_pk_max
 define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
-; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, 0x4400
-; VI-NEXT:    v_add_f16_e32 v1, 1.0, v0
-; VI-NEXT:    v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_build_vector_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_f16_e32 v1, 1.0, v0
-; GFX9-NEXT:    v_mul_f16_e32 v0, 4.0, v0
-; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %lo = extractelement <2 x half> %vec, i32 0
   %hi = extractelement <2 x half> %vec, i32 1
   %lo.op = fadd half %lo, 1.0
@@ -1943,22 +750,6 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
 ; GFX9: v_add_f16_e32
 ; GFX9: v_pk_max
 define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %vec) {
-; VI-LABEL: v_test_canonicalize_build_vector_noncanon1_v2f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e32 v1, 1.0, v0
-; VI-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_build_vector_noncanon1_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_f16_e32 v1, 1.0, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NEXT:    v_bfi_b32 v0, s4, v1, v0
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %lo = extractelement <2 x half> %vec, i32 0
   %lo.op = fadd half %lo, 1.0
   %ins = insertelement <2 x half> %vec, half %lo.op, i32 0
@@ -1970,24 +761,6 @@ define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %
 ; GFX9: v_add_f16_sdwa
 ; GFX9: v_pk_max
 define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %vec) {
-; VI-LABEL: v_test_canonicalize_build_vector_noncanon0_v2f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, 0x3c00
-; VI-NEXT:    v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_max_f16_e32 v0, v0, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_build_vector_noncanon0_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3c00
-; GFX9-NEXT:    v_add_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %hi = extractelement <2 x half> %vec, i32 1
   %hi.op = fadd half %hi, 1.0
   %ins = insertelement <2 x half> %vec, half %hi.op, i32 1
@@ -2000,11 +773,6 @@ define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %
 ; GFX9-NEXT: v_mul_f16_e32 v0, 4.0, v0
 ; GFX9-NEXT: s_setpc_b64
 define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
-; GCN-LABEL: v_test_canonicalize_extract_element_v2f16:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f16_e32 v0, 4.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
   %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
   %elt = extractelement <2 x half> %vec.op, i32 0
   %canonicalized = call half @llvm.canonicalize.f16(half %elt)
@@ -2024,32 +792,6 @@ define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
 ; GFX9: v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT: s_setpc_b64
 define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x half> %vec, half %val, i32 %idx) {
-; VI-LABEL: v_test_canonicalize_insertelement_noncanon_vec_v2f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mul_f16_e32 v1, 0x4800, v1
-; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; VI-NEXT:    s_mov_b32 s4, 0xffff
-; VI-NEXT:    v_or_b32_e32 v1, v1, v3
-; VI-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
-; VI-NEXT:    v_bfi_b32 v0, v2, v1, v0
-; VI-NEXT:    v_max_f16_sdwa v1, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_e32 v0, v0, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_insertelement_noncanon_vec_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mul_f16_e32 v1, 0x4800, v1
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NEXT:    v_pack_b32_f16 v1, v1, v1
-; GFX9-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
-; GFX9-NEXT:    v_bfi_b32 v0, v2, v1, v0
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %ins.op = fmul half %val, 8.0
   %ins = insertelement <2 x half> %vec, half %ins.op, i32 %idx
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
@@ -2061,37 +803,6 @@ define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x hal
 ; GFX9: v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT: s_setpc_b64
 define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x half> %vec, half %val, i32 %idx) {
-; VI-LABEL: v_test_canonicalize_insertelement_noncanon_insval_v2f16:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v3, 0x4400
-; VI-NEXT:    v_mul_f16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_mul_f16_e32 v0, 4.0, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v3
-; VI-NEXT:    v_mov_b32_e32 v3, 16
-; VI-NEXT:    s_mov_b32 s4, 0xffff
-; VI-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; VI-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
-; VI-NEXT:    v_bfi_b32 v0, v2, v1, v0
-; VI-NEXT:    v_max_f16_sdwa v1, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_e32 v0, v0, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_insertelement_noncanon_insval_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-NEXT:    v_perm_b32 v1, v1, v1, s4
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NEXT:    v_pk_mul_f16 v0, v0, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_lshlrev_b32_e64 v2, v2, s4
-; GFX9-NEXT:    v_bfi_b32 v0, v2, v1, v0
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
   %ins = insertelement <2 x half> %vec.op, half %val, i32 %idx
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
@@ -2109,11 +820,6 @@ define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x
 ; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
-; GCN-LABEL: v_test_canonicalize_cubeid:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cubeid_f32 v0, v0, v1, v2
-; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
   %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
   ret float %canonicalized
@@ -2124,11 +830,6 @@ define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
 ; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_frexp_mant(float %a) {
-; GCN-LABEL: v_test_canonicalize_frexp_mant:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_frexp_mant_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
   %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
   ret float %canonicalized
@@ -2139,11 +840,6 @@ define float @v_test_canonicalize_frexp_mant(float %a) {
 ; GCN-NEXT: v_log_f32
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_amdgcn_log(float %a) {
-; GCN-LABEL: v_test_canonicalize_amdgcn_log:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
   %log = call float @llvm.amdgcn.log.f32(float %a)
   %canonicalized = call float @llvm.canonicalize.f32(float %log)
   ret float %canonicalized
@@ -2154,11 +850,6 @@ define float @v_test_canonicalize_amdgcn_log(float %a) {
 ; GCN-NEXT: v_exp_f32
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_amdgcn_exp2(float %a) {
-; GCN-LABEL: v_test_canonicalize_amdgcn_exp2:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
   %log = call float @llvm.amdgcn.exp2.f32(float %a)
   %canonicalized = call float @llvm.canonicalize.f32(float %log)
   ret float %canonicalized
@@ -2166,43 +857,14 @@ define float @v_test_canonicalize_amdgcn_exp2(float %a) {
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_minimum:
 ; GCN: s_waitcnt
+; GCN-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GCN-NEXT: v_min_f32_e32 [[MIN:v[0-9]+]], v0, v1
-; GCN-NEXT: v_mov_b32_e32 [[K:v[0-9]+]], 0x7fc00000
-; GCN-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GCN-NEXT: v_cndmask_b32_e32 v0, [[K]], [[MIN]], vcc
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_minimum(float %a, float %b) {
-; VI-FLUSH-LABEL: v_test_canonicalize_minimum:
-; VI-FLUSH:       ; %bb.0:
-; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; VI-FLUSH-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-FLUSH-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-DENORM-LABEL: v_test_canonicalize_minimum:
-; VI-DENORM:       ; %bb.0:
-; VI-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; VI-DENORM-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; VI-DENORM-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; VI-DENORM-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-DENORM-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_minimum:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -2210,43 +872,14 @@ define float @v_test_canonicalize_minimum(float %a, float %b) {
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_maximum:
 ; GCN: s_waitcnt
-; GCN-NEXT: v_max_f32_e32 [[MIN:v[0-9]+]], v0, v1
-; GCN-NEXT: v_mov_b32_e32 [[K:v[0-9]+]], 0x7fc00000
-; GCN-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GCN-NEXT: v_cndmask_b32_e32 v0, [[K]], [[MIN]], vcc
+; GCN-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GCN-NEXT: v_max_f32_e32 [[MAX:v[0-9]+]], v0, v1
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_maximum(float %a, float %b) {
-; VI-FLUSH-LABEL: v_test_canonicalize_maximum:
-; VI-FLUSH:       ; %bb.0:
-; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-FLUSH-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; VI-FLUSH-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; VI-FLUSH-NEXT:    v_max_f32_e32 v0, v0, v1
-; VI-FLUSH-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-FLUSH-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-DENORM-LABEL: v_test_canonicalize_maximum:
-; VI-DENORM:       ; %bb.0:
-; VI-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; VI-DENORM-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-DENORM-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; VI-DENORM-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; VI-DENORM-NEXT:    v_max_f32_e32 v0, v0, v1
-; VI-DENORM-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_maximum:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.maximum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -2263,21 +896,6 @@ define float @v_test_canonicalize_maximum(float %a, float %b) {
 ; GCN-NEXT: v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_minimumnum(float %a, float %b) {
-; VI-LABEL: v_test_canonicalize_minimumnum:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_minimumnum:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -2294,21 +912,6 @@ define float @v_test_canonicalize_minimumnum(float %a, float %b) {
 ; GCN-NEXT: v_max_f32_e32 v0, v0, v1
 ; GCN-NEXT: s_setpc_b64
 define float @v_test_canonicalize_maximumnum(float %a, float %b) {
-; VI-LABEL: v_test_canonicalize_maximumnum:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; VI-NEXT:    v_max_f32_e32 v0, v0, v1
-; VI-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_test_canonicalize_maximumnum:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.maximumnum.f32(float %a, float %b)
   %canonicalized = call float @llvm.canonicalize.f32(float %min)
   ret float %canonicalized
@@ -2348,6 +951,3 @@ declare float @llvm.amdgcn.exp2.f32(float) #0
 attributes #0 = { nounwind readnone }
 attributes #1 = { "no-nans-fp-math"="true" }
 attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN-DENORM: {{.*}}
-; GCN-FLUSH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index bfb24a1970a8f..cf6fa6ba1cf26 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -4440,13 +4440,6 @@ define float @v_fmul_0_fsub_0_safe_infloop_regression(float %arg) {
 ; SI-NSZ-NEXT:    s_brev_b32 s4, 1
 ; SI-NSZ-NEXT:    v_fma_f32 v0, v0, s4, 0
 ; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_fmul_0_fsub_0_safe_infloop_regression:
-; VI:       ; %bb.0: ; %bb
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mul_f32_e32 v0, 0, v0
-; VI-NEXT:    v_sub_f32_e32 v0, 0, v0
-; VI-NEXT:    s_setpc_b64 s[30:31]
 ; FIXME: utils/update_llc_test_checks.py will generate redundant VI
 ; labels, remove them, they will cause test failure.
 bb:



More information about the llvm-commits mailing list