[llvm] [SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder (PR #93575)

Sun Jan 18 12:43:54 PST 2026

https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/93575

>From 47733cae5e4b8919702d8f5a1de513bbdb2dc4b4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 28 May 2024 23:45:55 +0800
Subject: [PATCH 1/4] [RISCV][SDAG] Add pre-commit tests for PR93414. NFC.

---
 llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index f08777ac3e5de..62e3d1d1c34e5 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -490,3 +490,21 @@ entry:
   %retval.0 = select i1 %cmp, float 1.000000e+00, float %.a
   ret float %retval.0
 }
+
+; Test from PR93414
+; Make sure that we don't use fmin.s here to handle signed zero correctly.
+define float @select_fcmp_olt_pos_zero(float %x) {
+; CHECK-LABEL: select_fcmp_olt_pos_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.w.x fa5, zero
+; CHECK-NEXT:    fmin.s fa0, fa0, fa5
+; CHECK-NEXT:    ret
+;
+; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
+; CHECKZFINX:       # %bb.0:
+; CHECKZFINX-NEXT:    fmin.s a0, a0, zero
+; CHECKZFINX-NEXT:    ret
+  %cmp = fcmp olt float %x, 0.000000
+  %sel = select i1 %cmp, float %x, float 0.000000
+  ret float %sel
+}

>From 81366134480626e8734866a37d2ec868a565aad2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 19 Jan 2026 01:39:53 +0800
Subject: [PATCH 2/4] [SDAG] Drop select -> fmax/min folding in
 SelectionDAGBuilder

---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 27131e14141cc..93c46b8225112 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3864,32 +3864,6 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     case SPF_UMIN:    Opc = ISD::UMIN; break;
     case SPF_SMAX:    Opc = ISD::SMAX; break;
     case SPF_SMIN:    Opc = ISD::SMIN; break;
-    case SPF_FMINNUM:
-      switch (SPR.NaNBehavior) {
-      case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
-      case SPNB_RETURNS_NAN: break;
-      case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
-      case SPNB_RETURNS_ANY:
-        if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
-            (UseScalarMinMax &&
-             TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
-          Opc = ISD::FMINNUM;
-        break;
-      }
-      break;
-    case SPF_FMAXNUM:
-      switch (SPR.NaNBehavior) {
-      case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
-      case SPNB_RETURNS_NAN: break;
-      case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
-      case SPNB_RETURNS_ANY:
-        if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
-            (UseScalarMinMax &&
-             TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
-          Opc = ISD::FMAXNUM;
-        break;
-      }
-      break;
     case SPF_NABS:
       Negate = true;
       [[fallthrough]];

>From a2a574543c1e44d62ff37fc10d9a59428f021870 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 19 Jan 2026 03:57:49 +0800
Subject: [PATCH 3/4] [CodeGen] Fix trivial tests. NFC.

---
 llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll  |   4 +-
 llvm/test/CodeGen/AArch64/arm64-fmax.ll       |   6 +-
 llvm/test/CodeGen/AArch64/select_fmf.ll       |  20 +--
 .../test/CodeGen/AArch64/sve-pred-selectop.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll   | 135 +++++++++--------
 llvm/test/CodeGen/AMDGPU/fmed3.ll             |   4 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll   | 135 +++++++++--------
 llvm/test/CodeGen/AMDGPU/reduction.ll         |  24 +--
 .../AMDGPU/select-flags-to-fmin-fmax.ll       | 138 ++++++++++--------
 llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll  |  28 ++--
 .../test/CodeGen/ARM/fp16-vminmaxnm-vector.ll |  64 ++++----
 .../CodeGen/ARM/minnum-maxnum-intrinsics.ll   |   2 +-
 llvm/test/CodeGen/ARM/vminmaxnm-safe.ll       |  40 ++---
 llvm/test/CodeGen/PowerPC/vec-min-max.ll      |   8 +-
 llvm/test/CodeGen/RISCV/float-select-fcmp.ll  |  12 +-
 llvm/test/CodeGen/SystemZ/vec-max-05.ll       |   6 +-
 .../CodeGen/SystemZ/vec-max-min-zerosplat.ll  |   8 +-
 llvm/test/CodeGen/SystemZ/vec-min-05.ll       |   6 +-
 llvm/test/CodeGen/Thumb2/mve-minmax.ll        |  80 ++++++++--
 llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll |   8 +-
 .../test/CodeGen/Thumb2/mve-pred-selectop2.ll |  16 +-
 .../test/CodeGen/Thumb2/mve-pred-selectop3.ll |   8 +-
 .../CodeGen/Thumb2/mve-vecreduce-fminmax.ll   |  40 ++---
 23 files changed, 442 insertions(+), 358 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index aec31eced397e..c95024c8eacec 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -27,7 +27,7 @@ define double @test_cross(float %in) {
 define double @test_cross_fail_nan(float %in) {
 ; CHECK-LABEL: test_cross_fail_nan:
   %cmp = fcmp olt float %in, 0.000000e+00
-  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %val = select nnan nsz i1 %cmp, float %in, float 0.000000e+00
   %longer = fpext float %val to double
   ret double %longer
 
@@ -68,7 +68,7 @@ define float @minnum_fcmp_nsz(float %x, float %y) {
 define float @minnum_select_nsz(float %x, float %y) {
 ; CHECK-LABEL: minnum_select_nsz:
   %cmp = fcmp nnan ole float %x, %y
-  %sel = select nsz i1 %cmp, float %x, float %y
+  %sel = select nnan nsz i1 %cmp, float %x, float %y
   ret float %sel
 ; CHECK: fminnm s0, s0, s1
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index 85104775339b6..3a4eb5426e6dd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -9,7 +9,7 @@ define double @test_direct(float %in) {
 ; CHECK-NEXT:    fcvt d0, s0
 ; CHECK-NEXT:    ret
   %cmp = fcmp nnan olt float %in, 0.000000e+00
-  %val = select i1 %cmp, float 0.000000e+00, float %in
+  %val = select nsz i1 %cmp, float 0.000000e+00, float %in
   %longer = fpext float %val to double
   ret double %longer
 }
@@ -22,7 +22,7 @@ define double @test_cross(float %in) {
 ; CHECK-NEXT:    fcvt d0, s0
 ; CHECK-NEXT:    ret
   %cmp = fcmp nnan ult float %in, 0.000000e+00
-  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %val = select nsz i1 %cmp, float %in, float 0.000000e+00
   %longer = fpext float %val to double
   ret double %longer
 }
@@ -37,7 +37,7 @@ define double @test_cross_fail_nan(float %in) {
 ; CHECK-NEXT:    fcvt d0, s0
 ; CHECK-NEXT:    ret
   %cmp = fcmp nnan olt float %in, 0.000000e+00
-  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %val = select nsz i1 %cmp, float %in, float 0.000000e+00
   %longer = fpext float %val to double
   ret double %longer
 }
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 88f517af65bb6..333e25fb5b824 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -8,12 +8,12 @@
 define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
 ; CHECK-SD-LABEL: select_select_fold_select_and:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fminnm s4, s1, s2
 ; CHECK-SD-NEXT:    fcmp s1, s2
+; CHECK-SD-NEXT:    fmov s4, #0.50000000
+; CHECK-SD-NEXT:    fcsel s1, s1, s2, lt
 ; CHECK-SD-NEXT:    fmaxnm s2, s0, s3
-; CHECK-SD-NEXT:    fmov s1, #0.50000000
-; CHECK-SD-NEXT:    fccmp s4, s0, #4, lt
-; CHECK-SD-NEXT:    fadd s1, s0, s1
+; CHECK-SD-NEXT:    fccmp s1, s0, #4, lt
+; CHECK-SD-NEXT:    fadd s1, s0, s4
 ; CHECK-SD-NEXT:    fcsel s2, s2, s0, gt
 ; CHECK-SD-NEXT:    fadd s4, s1, s2
 ; CHECK-SD-NEXT:    fcmp s4, s1
@@ -98,13 +98,13 @@ exit:                                     ; preds = %if.end.i159.i.i, %if.then.i
 define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
 ; CHECK-SD-LABEL: select_select_fold_select_or:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fminnm s4, s1, s2
 ; CHECK-SD-NEXT:    fcmp s1, s2
-; CHECK-SD-NEXT:    fmaxnm s2, s0, s3
-; CHECK-SD-NEXT:    fmov s1, #0.50000000
-; CHECK-SD-NEXT:    fccmp s4, s0, #0, ge
-; CHECK-SD-NEXT:    fadd s1, s0, s1
-; CHECK-SD-NEXT:    fcsel s2, s0, s2, gt
+; CHECK-SD-NEXT:    fcsel s1, s1, s2, lt
+; CHECK-SD-NEXT:    fccmp s0, s3, #0, ge
+; CHECK-SD-NEXT:    fmov s2, #0.50000000
+; CHECK-SD-NEXT:    fccmp s1, s0, #0, le
+; CHECK-SD-NEXT:    fadd s1, s0, s2
+; CHECK-SD-NEXT:    fcsel s2, s0, s3, gt
 ; CHECK-SD-NEXT:    fadd s4, s1, s2
 ; CHECK-SD-NEXT:    fcmp s4, s1
 ; CHECK-SD-NEXT:    b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index 9a78726c450d1..faabf657601a2 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -660,7 +660,7 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal
 entry:
   %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
   %a1 = fcmp fast olt <vscale x 4 x float> %x, %y
-  %a = select <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
+  %a = select nnan nsz <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
   %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z
   ret <vscale x 4 x float> %b
 }
@@ -676,7 +676,7 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale
 entry:
   %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
   %a1 = fcmp fast olt <vscale x 8 x half> %x, %y
-  %a = select <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
+  %a = select nnan nsz <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
   %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z
   ret <vscale x 8 x half> %b
 }
@@ -692,7 +692,7 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal
 entry:
   %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
   %a1 = fcmp fast ogt <vscale x 4 x float> %x, %y
-  %a = select <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
+  %a = select nnan nsz <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
   %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z
   ret <vscale x 4 x float> %b
 }
@@ -708,7 +708,7 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale
 entry:
   %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
   %a1 = fcmp fast ogt <vscale x 8 x half> %x, %y
-  %a = select <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
+  %a = select nnan nsz <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
   %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z
   ret <vscale x 8 x half> %b
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index bd28f72bb8913..e6da13ec5953b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -67,10 +67,10 @@ define half @test_fmax_legacy_ugt_f16_fast(half %a, half %b) #0 {
 ; SI-LABEL: test_fmax_legacy_ugt_f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NEXT:    v_max_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -168,24 +168,28 @@ define <2 x half> @test_fmax_legacy_ugt_v2f16_fast(<2 x half> %a, <2 x half> %b)
 ; VI-LABEL: test_fmax_legacy_ugt_v2f16_fast:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_e32 v0, v0, v1
-; VI-NEXT:    v_or_b32_e32 v0, v0, v2
+; VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; VI-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
+; VI-NEXT:    v_cndmask_b32_sdwa v2, v2, v3, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-LABEL: test_fmax_legacy_ugt_v2f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_max_f32_e32 v0, v0, v2
-; SI-NEXT:    v_max_f32_e32 v1, v1, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_max_legacy_f32_e32 v0, v2, v0
+; SI-NEXT:    v_max_legacy_f32_e32 v1, v3, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmax_legacy_ugt_v2f16_fast:
@@ -285,6 +289,8 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16_fast(<3 x half> %a, <3 x half> %b)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -292,39 +298,46 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16_fast(<3 x half> %a, <3 x half> %b)
 ; VI-LABEL: test_fmax_legacy_ugt_v3f16_fast:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_e32 v0, v0, v2
-; VI-NEXT:    v_max_f16_e32 v1, v1, v3
-; VI-NEXT:    v_or_b32_e32 v0, v0, v4
+; VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; VI-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
+; VI-NEXT:    v_cndmask_b32_sdwa v4, v4, v5, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
+; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-LABEL: test_fmax_legacy_ugt_v3f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_max_f32_e32 v0, v0, v3
-; SI-NEXT:    v_max_f32_e32 v1, v1, v4
-; SI-NEXT:    v_max_f32_e32 v2, v2, v5
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_max_legacy_f32_e32 v0, v3, v0
+; SI-NEXT:    v_max_legacy_f32_e32 v1, v4, v1
+; SI-NEXT:    v_max_legacy_f32_e32 v2, v5, v2
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmax_legacy_ugt_v3f16_fast:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX11-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ugt <3 x half> %a, %b
@@ -461,26 +474,26 @@ define <4 x half> @test_fmax_legacy_ugt_v4f16_fast(<4 x half> %a, <4 x half> %b)
 ; SI-LABEL: test_fmax_legacy_ugt_v4f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; SI-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_max_f32_e32 v0, v0, v4
-; SI-NEXT:    v_max_f32_e32 v1, v1, v5
-; SI-NEXT:    v_max_f32_e32 v2, v2, v6
-; SI-NEXT:    v_max_f32_e32 v3, v3, v7
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
+; SI-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
+; SI-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
+; SI-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmax_legacy_ugt_v4f16_fast:
@@ -701,46 +714,46 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16_fast(<8 x half> %a, <8 x half> %b)
 ; SI-LABEL: test_fmax_legacy_ugt_v8f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; SI-NEXT:    v_cvt_f16_f32_e32 v14, v14
+; SI-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; SI-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; SI-NEXT:    v_cvt_f16_f32_e32 v13, v13
+; SI-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-NEXT:    v_cvt_f16_f32_e32 v12, v12
+; SI-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-NEXT:    v_cvt_f16_f32_e32 v11, v11
+; SI-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; SI-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v9, v9
+; SI-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v8, v8
+; SI-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v15, v15
+; SI-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; SI-NEXT:    v_cvt_f32_f16_e32 v15, v15
 ; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; SI-NEXT:    v_cvt_f32_f16_e32 v14, v14
 ; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; SI-NEXT:    v_cvt_f32_f16_e32 v13, v13
 ; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; SI-NEXT:    v_cvt_f32_f16_e32 v12, v12
 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; SI-NEXT:    v_cvt_f32_f16_e32 v11, v11
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; SI-NEXT:    v_cvt_f32_f16_e32 v10, v10
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_max_f32_e32 v0, v0, v8
-; SI-NEXT:    v_max_f32_e32 v1, v1, v9
-; SI-NEXT:    v_max_f32_e32 v2, v2, v10
-; SI-NEXT:    v_max_f32_e32 v3, v3, v11
-; SI-NEXT:    v_max_f32_e32 v4, v4, v12
-; SI-NEXT:    v_max_f32_e32 v5, v5, v13
-; SI-NEXT:    v_max_f32_e32 v6, v6, v14
-; SI-NEXT:    v_max_f32_e32 v7, v7, v15
+; SI-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
+; SI-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
+; SI-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
+; SI-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
+; SI-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
+; SI-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
+; SI-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
+; SI-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmax_legacy_ugt_v8f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 29163c111fc5e..19fa6151c406b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -1105,11 +1105,11 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
 
   ; fmax_legacy
   %cmp0 = fcmp ule float %a.nnan, 2.0
-  %max = select i1 %cmp0, float 2.0, float %a.nnan
+  %max = select nnan nsz i1 %cmp0, float 2.0, float %a.nnan
 
   ; fmin_legacy
   %cmp1 = fcmp uge float %max, 4.0
-  %med = select i1 %cmp1, float 4.0, float %max
+  %med = select nnan nsz i1 %cmp1, float 4.0, float %max
 
   store float %med, ptr addrspace(1) %outgep
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index 40c2ec0a39f51..e7ebe5cfbb189 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -68,10 +68,10 @@ define half @test_fmin_legacy_ule_f16_fast(half %a, half %b) #0 {
 ; SI-LABEL: test_fmin_legacy_ule_f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NEXT:    v_min_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -169,24 +169,28 @@ define <2 x half> @test_fmin_legacy_ule_v2f16_fast(<2 x half> %a, <2 x half> %b)
 ; VI-LABEL: test_fmin_legacy_ule_v2f16_fast:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_min_f16_e32 v0, v0, v1
-; VI-NEXT:    v_or_b32_e32 v0, v0, v2
+; VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; VI-NEXT:    v_cmp_ngt_f16_e32 vcc, v3, v2
+; VI-NEXT:    v_cndmask_b32_sdwa v2, v2, v3, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_cmp_ngt_f16_e32 vcc, v0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-LABEL: test_fmin_legacy_ule_v2f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_min_f32_e32 v0, v0, v2
-; SI-NEXT:    v_min_f32_e32 v1, v1, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_min_legacy_f32_e32 v0, v2, v0
+; SI-NEXT:    v_min_legacy_f32_e32 v1, v3, v1
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmin_legacy_ule_v2f16_fast:
@@ -286,6 +290,8 @@ define <3 x half> @test_fmin_legacy_ule_v3f16_fast(<3 x half> %a, <3 x half> %b)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -293,39 +299,46 @@ define <3 x half> @test_fmin_legacy_ule_v3f16_fast(<3 x half> %a, <3 x half> %b)
 ; VI-LABEL: test_fmin_legacy_ule_v3f16_fast:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_min_f16_e32 v0, v0, v2
-; VI-NEXT:    v_min_f16_e32 v1, v1, v3
-; VI-NEXT:    v_or_b32_e32 v0, v0, v4
+; VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; VI-NEXT:    v_cmp_ngt_f16_e32 vcc, v5, v4
+; VI-NEXT:    v_cndmask_b32_sdwa v4, v4, v5, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_cmp_ngt_f16_e32 vcc, v1, v3
+; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    v_cmp_ngt_f16_e32 vcc, v0, v2
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-LABEL: test_fmin_legacy_ule_v3f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_min_f32_e32 v0, v0, v3
-; SI-NEXT:    v_min_f32_e32 v1, v1, v4
-; SI-NEXT:    v_min_f32_e32 v2, v2, v5
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_min_legacy_f32_e32 v0, v3, v0
+; SI-NEXT:    v_min_legacy_f32_e32 v1, v4, v1
+; SI-NEXT:    v_min_legacy_f32_e32 v2, v5, v2
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmin_legacy_ule_v3f16_fast:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX11-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ule <3 x half> %a, %b
@@ -462,26 +475,26 @@ define <4 x half> @test_fmin_legacy_ule_v4f16_fast(<4 x half> %a, <4 x half> %b)
 ; SI-LABEL: test_fmin_legacy_ule_v4f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; SI-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_min_f32_e32 v0, v0, v4
-; SI-NEXT:    v_min_f32_e32 v1, v1, v5
-; SI-NEXT:    v_min_f32_e32 v2, v2, v6
-; SI-NEXT:    v_min_f32_e32 v3, v3, v7
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    v_min_legacy_f32_e32 v0, v4, v0
+; SI-NEXT:    v_min_legacy_f32_e32 v1, v5, v1
+; SI-NEXT:    v_min_legacy_f32_e32 v2, v6, v2
+; SI-NEXT:    v_min_legacy_f32_e32 v3, v7, v3
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmin_legacy_ule_v4f16_fast:
@@ -702,46 +715,46 @@ define <8 x half> @test_fmin_legacy_ule_v8f16_fast(<8 x half> %a, <8 x half> %b)
 ; SI-LABEL: test_fmin_legacy_ule_v8f16_fast:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; SI-NEXT:    v_cvt_f16_f32_e32 v14, v14
+; SI-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; SI-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; SI-NEXT:    v_cvt_f16_f32_e32 v13, v13
+; SI-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-NEXT:    v_cvt_f16_f32_e32 v12, v12
+; SI-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-NEXT:    v_cvt_f16_f32_e32 v11, v11
+; SI-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; SI-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v9, v9
+; SI-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v8, v8
+; SI-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v15, v15
+; SI-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; SI-NEXT:    v_cvt_f32_f16_e32 v15, v15
 ; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; SI-NEXT:    v_cvt_f32_f16_e32 v14, v14
 ; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; SI-NEXT:    v_cvt_f32_f16_e32 v13, v13
 ; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; SI-NEXT:    v_cvt_f32_f16_e32 v12, v12
 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; SI-NEXT:    v_cvt_f32_f16_e32 v11, v11
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; SI-NEXT:    v_cvt_f32_f16_e32 v10, v10
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_min_f32_e32 v0, v0, v8
-; SI-NEXT:    v_min_f32_e32 v1, v1, v9
-; SI-NEXT:    v_min_f32_e32 v2, v2, v10
-; SI-NEXT:    v_min_f32_e32 v3, v3, v11
-; SI-NEXT:    v_min_f32_e32 v4, v4, v12
-; SI-NEXT:    v_min_f32_e32 v5, v5, v13
-; SI-NEXT:    v_min_f32_e32 v6, v6, v14
-; SI-NEXT:    v_min_f32_e32 v7, v7, v15
+; SI-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT:    v_min_legacy_f32_e32 v0, v8, v0
+; SI-NEXT:    v_min_legacy_f32_e32 v1, v9, v1
+; SI-NEXT:    v_min_legacy_f32_e32 v2, v10, v2
+; SI-NEXT:    v_min_legacy_f32_e32 v3, v11, v3
+; SI-NEXT:    v_min_legacy_f32_e32 v4, v12, v4
+; SI-NEXT:    v_min_legacy_f32_e32 v5, v13, v5
+; SI-NEXT:    v_min_legacy_f32_e32 v6, v14, v6
+; SI-NEXT:    v_min_legacy_f32_e32 v7, v15, v7
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_fmin_legacy_ule_v8f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 291eccd405b8a..0dd2b0ba5550b 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -619,8 +619,6 @@ define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
 ; GFX9-LABEL: reduction_fast_max_pattern_v4f16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
 ; GFX9-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -628,21 +626,17 @@ define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
 ; VI-LABEL: reduction_fast_max_pattern_v4f16:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_e32 v1, v1, v1
-; VI-NEXT:    v_max_f16_e32 v0, v0, v0
-; VI-NEXT:    v_max_f16_e32 v2, v3, v2
+; VI-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_max_f16_e32 v0, v0, v1
 ; VI-NEXT:    v_max_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
-  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
+  %rdx.minmax.select = select nnan nsz <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
   %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
-  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
+  %rdx.minmax.select3 = select nnan nsz <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
   %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
   ret half %res
 }
@@ -653,8 +647,6 @@ define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
 ; GFX9-LABEL: reduction_fast_min_pattern_v4f16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX9-NEXT:    v_pk_min_f16 v0, v0, v1
 ; GFX9-NEXT:    v_min_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -662,21 +654,17 @@ define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
 ; VI-LABEL: reduction_fast_min_pattern_v4f16:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_max_f16_e32 v1, v1, v1
-; VI-NEXT:    v_max_f16_e32 v0, v0, v0
-; VI-NEXT:    v_min_f16_e32 v2, v3, v2
+; VI-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_min_f16_e32 v0, v0, v1
 ; VI-NEXT:    v_min_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
-  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
+  %rdx.minmax.select = select nnan nsz <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
   %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
-  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
+  %rdx.minmax.select3 = select nnan nsz <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
   %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
   ret half %res
 }
diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
index 09f7e7a926376..9aae8ec448261 100644
--- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
@@ -355,15 +355,17 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT:    v_min_legacy_f32_e32 v1, v3, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-NEXT:    v_cmp_ngt_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT:    v_cmp_ngt_f32_e32 vcc, v1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
@@ -373,7 +375,12 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX12-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ule <2 x float> %a, %b
   %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -492,15 +499,17 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT:    v_max_legacy_f32_e32 v1, v3, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX9-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT:    v_cmp_nlt_f32_e32 vcc, v1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
@@ -510,7 +519,12 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX12-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp uge <2 x float> %a, %b
   %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -659,10 +673,10 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
 ; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -838,10 +852,10 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
 ; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1086,16 +1100,16 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT:    v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT:    v_min_legacy_f32_e32 v1, v3, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
@@ -1329,16 +1343,16 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT:    v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT:    v_max_legacy_f32_e32 v1, v3, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
@@ -1659,26 +1673,26 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
 ; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_min_f32_e32 v0, v0, v4
-; GFX7-NEXT:    v_min_f32_e32 v1, v1, v5
-; GFX7-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX7-NEXT:    v_min_f32_e32 v3, v3, v7
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT:    v_min_legacy_f32_e32 v0, v4, v0
+; GFX7-NEXT:    v_min_legacy_f32_e32 v1, v5, v1
+; GFX7-NEXT:    v_min_legacy_f32_e32 v2, v6, v2
+; GFX7-NEXT:    v_min_legacy_f32_e32 v3, v7, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
@@ -2001,26 +2015,26 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
 ; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
-; GFX7-NEXT:    v_max_f32_e32 v1, v1, v5
-; GFX7-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX7-NEXT:    v_max_f32_e32 v3, v3, v7
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
+; GFX7-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
+; GFX7-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
+; GFX7-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 52fe5ce1a8a5f..b088606cb5225 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -214,7 +214,7 @@ define half @fp16_vminnm_NNNo(half %a) {
 ; CHECK-NEXT:    .short 0x5040 @ half 34
 entry:
   %cmp1 = fcmp olt half %a, 12.
-  %cond1 = select i1 %cmp1, half %a, half 12.
+  %cond1 = select nnan nsz i1 %cmp1, half %a, half 12.
   %cmp2 = fcmp olt half 34., %cond1
   %cond2 = select i1 %cmp2, half 34., half %cond1
   ret half %cond2
@@ -242,7 +242,7 @@ entry:
   %cmp1 = fcmp ogt half %a, 56.
   %cond1 = select i1 %cmp1, half 56., half %a
   %cmp2 = fcmp ogt half 78., %cond1
-  %cond2 = select i1 %cmp2, half %cond1, half 78.
+  %cond2 = select nnan nsz i1 %cmp2, half %cond1, half 78.
   ret half %cond2
 }
 
@@ -264,7 +264,7 @@ define half @fp16_vminnm_NNNu(half %b) {
 ; CHECK-NEXT:    .short 0x5040 @ half 34
 entry:
   %cmp1 = fcmp ult half 12., %b
-  %cond1 = select i1 %cmp1, half 12., half %b
+  %cond1 = select nnan nsz i1 %cmp1, half 12., half %b
   %cmp2 = fcmp ult half %cond1, 34.
   %cond2 = select i1 %cmp2, half %cond1, half 34.
   ret half %cond2
@@ -291,7 +291,7 @@ define half @fp16_vminnm_NNNule(half %b) {
 
 entry:
   %cmp1 = fcmp ule half 34., %b
-  %cond1 = select i1 %cmp1, half 34., half %b
+  %cond1 = select nnan nsz i1 %cmp1, half 34., half %b
   %cmp2 = fcmp ule half %cond1, 56.
   %cond2 = select i1 %cmp2, half %cond1, half 56.
   ret half %cond2
@@ -321,7 +321,7 @@ entry:
   %cmp1 = fcmp ugt half 56., %b
   %cond1 = select i1 %cmp1, half %b, half 56.
   %cmp2 = fcmp ugt half %cond1, 78.
-  %cond2 = select i1 %cmp2, half 78., half %cond1
+  %cond2 = select nnan nsz i1 %cmp2, half 78., half %cond1
   ret half %cond2
 }
 
@@ -343,7 +343,7 @@ define half @fp16_vmaxnm_NNNo(half %a) {
 ; CHECK-NEXT:    .short 0x5040 @ half 34
 entry:
   %cmp1 = fcmp ogt half %a, 12.
-  %cond1 = select i1 %cmp1, half %a, half 12.
+  %cond1 = select nnan nsz i1 %cmp1, half %a, half 12.
   %cmp2 = fcmp ogt half 34., %cond1
   %cond2 = select i1 %cmp2, half 34., half %cond1
   ret half %cond2
@@ -369,7 +369,7 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
 ; CHECK-NEXT:    .short 0x5300 @ half 56
 entry:
   %cmp1 = fcmp oge half %a, 34.
-  %cond1 = select i1 %cmp1, half %a, half 34.
+  %cond1 = select nnan nsz i1 %cmp1, half %a, half 34.
   %cmp2 = fcmp oge half 56., %cond1
   %cond2 = select i1 %cmp2, half 56., half %cond1
   ret half %cond2
@@ -397,7 +397,7 @@ entry:
   %cmp1 = fcmp olt half %a, 56.
   %cond1 = select i1 %cmp1, half 56., half %a
   %cmp2 = fcmp olt half 78., %cond1
-  %cond2 = select i1 %cmp2, half %cond1, half 78.
+  %cond2 = select nnan nsz i1 %cmp2, half %cond1, half 78.
   ret half %cond2
 }
 
@@ -423,7 +423,7 @@ entry:
   %cmp1 = fcmp ole half %a, 78.
   %cond1 = select i1 %cmp1, half 78., half %a
   %cmp2 = fcmp ole half 90., %cond1
-  %cond2 = select i1 %cmp2, half %cond1, half 90.
+  %cond2 = select nnan nsz i1 %cmp2, half %cond1, half 90.
   ret half %cond2
 }
 
@@ -445,7 +445,7 @@ define half @fp16_vmaxnm_NNNu(half %b) {
 ; CHECK-NEXT:    .short 0x5040 @ half 34
 entry:
   %cmp1 = fcmp ugt half 12., %b
-  %cond1 = select i1 %cmp1, half 12., half %b
+  %cond1 = select nnan nsz i1 %cmp1, half 12., half %b
   %cmp2 = fcmp ugt half %cond1, 34.
   %cond2 = select i1 %cmp2, half %cond1, half 34.
   ret half %cond2
@@ -471,7 +471,7 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
 ; CHECK-NEXT:    .short 0x5300 @ half 56
 entry:
   %cmp1 = fcmp uge half 34., %b
-  %cond1 = select i1 %cmp1, half 34., half %b
+  %cond1 = select nnan nsz i1 %cmp1, half 34., half %b
   %cmp2 = fcmp uge half %cond1, 56.
   %cond2 = select i1 %cmp2, half %cond1, half 56.
   ret half %cond2
@@ -494,7 +494,7 @@ define half @fp16_vminmaxnm_neg0(half %a) {
 ; CHECK-NEXT:    .short 0x8000 @ half -0
 entry:
   %cmp1 = fcmp olt half %a, -0.
-  %cond1 = select i1 %cmp1, half %a, half -0.
+  %cond1 = select nnan nsz i1 %cmp1, half %a, half -0.
   %cmp2 = fcmp ugt half %cond1, -0.
   %cond2 = select i1 %cmp2, half %cond1, half -0.
   ret half %cond2
@@ -519,7 +519,7 @@ entry:
   %cmp1 = fcmp nsz ole half 0., %a
   %cond1 = select nsz i1 %cmp1, half 0., half %a
   %cmp2 = fcmp nsz uge half 0., %cond1
-  %cond2 = select nsz i1 %cmp2, half 0., half %cond1
+  %cond2 = select nnan nsz i1 %cmp2, half 0., half %cond1
   ret half %cond2
 }
 
@@ -540,7 +540,7 @@ define half @fp16_vminmaxnm_e_neg0(half %a) {
 ; CHECK-NEXT:    .short 0x8000 @ half -0
 entry:
   %cmp1 = fcmp nsz ule half -0., %a
-  %cond1 = select nsz i1 %cmp1, half -0., half %a
+  %cond1 = select nnan nsz i1 %cmp1, half -0., half %a
   %cmp2 = fcmp nsz oge half -0., %cond1
   %cond2 = select i1 %cmp2, half -0., half %cond1
   ret half %cond2
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
index 0b41c738080a6..eb401ae051305 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
@@ -10,7 +10,7 @@ define <4 x half> @test1(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ogt <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -19,7 +19,7 @@ define <4 x half> @test2(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ogt <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -28,7 +28,7 @@ define <4 x half> @test3(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast oge <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -37,7 +37,7 @@ define <4 x half> @test4(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast oge <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -46,7 +46,7 @@ define <4 x half> @test5(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast olt <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -55,7 +55,7 @@ define <4 x half> @test6(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast olt <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -64,7 +64,7 @@ define <4 x half> @test7(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ole <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -73,7 +73,7 @@ define <4 x half> @test8(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ole <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -84,7 +84,7 @@ define <4 x half> @test11(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ugt <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -93,7 +93,7 @@ define <4 x half> @test12(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ugt <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -102,7 +102,7 @@ define <4 x half> @test13(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast uge <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -111,7 +111,7 @@ define <4 x half> @test14(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast uge <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -120,7 +120,7 @@ define <4 x half> @test15(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ult <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -129,7 +129,7 @@ define <4 x half> @test16(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ult <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -138,7 +138,7 @@ define <4 x half> @test17(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vminnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ule <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
   ret <4 x half> %tmp4
 }
 
@@ -147,7 +147,7 @@ define <4 x half> @test18(<4 x half> %A, <4 x half> %B) {
 ; CHECK:         vmaxnm.f16 d0, d0, d1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ule <4 x half> %A, %B
-  %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+  %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
   ret <4 x half> %tmp4
 }
 
@@ -160,7 +160,7 @@ define <8 x half> @test201(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ogt <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -169,7 +169,7 @@ define <8 x half> @test202(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ogt <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -178,7 +178,7 @@ define <8 x half> @test203(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast oge <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -187,7 +187,7 @@ define <8 x half> @test204(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast oge <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -196,7 +196,7 @@ define <8 x half> @test205(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast olt <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -205,7 +205,7 @@ define <8 x half> @test206(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast olt <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -214,7 +214,7 @@ define <8 x half> @test207(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ole <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -223,7 +223,7 @@ define <8 x half> @test208(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ole <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -234,7 +234,7 @@ define <8 x half> @test209(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ugt <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -243,7 +243,7 @@ define <8 x half> @test210(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ugt <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -252,7 +252,7 @@ define <8 x half> @test211(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast uge <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -261,7 +261,7 @@ define <8 x half> @test214(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast uge <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -270,7 +270,7 @@ define <8 x half> @test215(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ult <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -279,7 +279,7 @@ define <8 x half> @test216(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ult <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
 
@@ -288,7 +288,7 @@ define <8 x half> @test217(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vminnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ule <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
   ret <8 x half> %tmp4
 }
 
@@ -297,6 +297,6 @@ define <8 x half> @test218(<8 x half> %A, <8 x half> %B) {
 ; CHECK:         vmaxnm.f16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
   %tmp3 = fcmp fast ule <8 x half> %A, %B
-  %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+  %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
   ret <8 x half> %tmp4
 }
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 6706d25ae01d2..65c84619914e2 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1387,7 +1387,7 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
 entry:
   %broadcast.splat = shufflevector <4 x float> %splat, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
   %0 = fcmp ogt <4 x float> %broadcast.splat, zeroinitializer
-  %1 = select <4 x i1> %0, <4 x float> %broadcast.splat, <4 x float> zeroinitializer
+  %1 = select nnan nsz <4 x i1> %0, <4 x float> %broadcast.splat, <4 x float> zeroinitializer
   store <4 x float> %1, ptr %y, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 5577ab49bb830..ad411c0a261a5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -175,7 +175,7 @@ define float @fp-armv8_vminnm_NNNo(float %a) {
 ; CHECK: vminnm.f32
 ; CHECK-NOT: vminnm.f32
   %cmp1 = fcmp olt float %a, 12.
-  %cond1 = select nsz i1 %cmp1, float %a, float 12.
+  %cond1 = select nnan nsz i1 %cmp1, float %a, float 12.
   %cmp2 = fcmp olt float 34., %cond1
   %cond2 = select i1 %cmp2, float 34., float %cond1
   ret float %cond2
@@ -186,7 +186,7 @@ define double @fp-armv8_vminnm_NNNole(double %a) {
 ; CHECK: vminnm.f64
 ; CHECK-NOT: vminnm.f64
   %cmp1 = fcmp ole double %a, 34.
-  %cond1 = select nsz i1 %cmp1, double %a, double 34.
+  %cond1 = select nnan nsz i1 %cmp1, double %a, double 34.
   %cmp2 = fcmp ole double 56., %cond1
   %cond2 = select i1 %cmp2, double 56., double %cond1
   ret double %cond2
@@ -199,7 +199,7 @@ define float @fp-armv8_vminnm_NNNo_rev(float %a) {
   %cmp1 = fcmp ogt float %a, 56.
   %cond1 = select nsz i1 %cmp1, float 56., float %a
   %cmp2 = fcmp ogt float 78., %cond1
-  %cond2 = select nsz i1 %cmp2, float %cond1, float 78.
+  %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 78.
   ret float %cond2
 }
 
@@ -210,7 +210,7 @@ define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
   %cmp1 = fcmp oge double %a, 78.
   %cond1 = select nsz i1 %cmp1, double 78., double %a
   %cmp2 = fcmp oge double 90., %cond1
-  %cond2 = select nsz i1 %cmp2, double %cond1, double 90.
+  %cond2 = select nnan nsz i1 %cmp2, double %cond1, double 90.
   ret double %cond2
 }
 
@@ -221,7 +221,7 @@ define float @fp-armv8_vminnm_NNNu(float %b) {
   %cmp1 = fcmp ult float 12., %b
   %cond1 = select nsz i1 %cmp1, float 12., float %b
   %cmp2 = fcmp ult float %cond1, 34.
-  %cond2 = select i1 %cmp2, float %cond1, float 34.
+  %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 34.
   ret float %cond2
 }
 
@@ -230,7 +230,7 @@ define float @fp-armv8_vminnm_NNNule(float %b) {
 ; CHECK: vminnm.f32
 ; CHECK-NOT: vminnm.f32
   %cmp1 = fcmp ule float 34., %b
-  %cond1 = select nsz i1 %cmp1, float 34., float %b
+  %cond1 = select nnan nsz i1 %cmp1, float 34., float %b
   %cmp2 = fcmp ule float %cond1, 56.
   %cond2 = select i1 %cmp2, float %cond1, float 56.
   ret float %cond2
@@ -243,7 +243,7 @@ define float @fp-armv8_vminnm_NNNu_rev(float %b) {
   %cmp1 = fcmp ugt float 56., %b
   %cond1 = select nsz i1 %cmp1, float %b, float 56.
   %cmp2 = fcmp ugt float %cond1, 78.
-  %cond2 = select nsz i1 %cmp2, float 78., float %cond1
+  %cond2 = select nnan nsz i1 %cmp2, float 78., float %cond1
   ret float %cond2
 }
 
@@ -254,7 +254,7 @@ define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
   %cmp1 = fcmp uge double 78., %b
   %cond1 = select nsz i1 %cmp1, double %b, double 78.
   %cmp2 = fcmp uge double %cond1, 90.
-  %cond2 = select nsz i1 %cmp2, double 90., double %cond1
+  %cond2 = select nnan nsz i1 %cmp2, double 90., double %cond1
   ret double %cond2
 }
 
@@ -265,7 +265,7 @@ define float @fp-armv8_vmaxnm_NNNo(float %a) {
   %cmp1 = fcmp ogt float %a, 12.
   %cond1 = select nsz i1 %cmp1, float %a, float 12.
   %cmp2 = fcmp ogt float 34., %cond1
-  %cond2 = select i1 %cmp2, float 34., float %cond1
+  %cond2 = select nnan nsz i1 %cmp2, float 34., float %cond1
   ret float %cond2
 }
 
@@ -274,7 +274,7 @@ define float @fp-armv8_vmaxnm_NNNoge(float %a) {
 ; CHECK: vmaxnm.f32
 ; CHECK-NOT: vmaxnm.f32
   %cmp1 = fcmp oge float %a, 34.
-  %cond1 = select nsz i1 %cmp1, float %a, float 34.
+  %cond1 = select nnan nsz i1 %cmp1, float %a, float 34.
   %cmp2 = fcmp oge float 56., %cond1
   %cond2 = select i1 %cmp2, float 56., float %cond1
   ret float %cond2
@@ -287,7 +287,7 @@ define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
   %cmp1 = fcmp olt float %a, 56.
   %cond1 = select nsz i1 %cmp1, float 56., float %a
   %cmp2 = fcmp olt float 78., %cond1
-  %cond2 = select nsz i1 %cmp2, float %cond1, float 78.
+  %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 78.
   ret float %cond2
 }
 
@@ -298,7 +298,7 @@ define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
   %cmp1 = fcmp ole float %a, 78.
   %cond1 = select nsz i1 %cmp1, float 78., float %a
   %cmp2 = fcmp ole float 90., %cond1
-  %cond2 = select nsz i1 %cmp2, float %cond1, float 90.
+  %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 90.
   ret float %cond2
 }
 
@@ -307,7 +307,7 @@ define float @fp-armv8_vmaxnm_NNNu(float %b) {
 ; CHECK: vmaxnm.f32
 ; CHECK-NOT: vmaxnm.f32
   %cmp1 = fcmp ugt float 12., %b
-  %cond1 = select nsz i1 %cmp1, float 12., float %b
+  %cond1 = select nnan nsz i1 %cmp1, float 12., float %b
   %cmp2 = fcmp ugt float %cond1, 34.
   %cond2 = select i1 %cmp2, float %cond1, float 34.
   ret float %cond2
@@ -318,7 +318,7 @@ define float @fp-armv8_vmaxnm_NNNuge(float %b) {
 ; CHECK: vmaxnm.f32
 ; CHECK-NOT: vmaxnm.f32
   %cmp1 = fcmp uge float 34., %b
-  %cond1 = select nsz i1 %cmp1, float 34., float %b
+  %cond1 = select nnan nsz i1 %cmp1, float 34., float %b
   %cmp2 = fcmp uge float %cond1, 56.
   %cond2 = select i1 %cmp2, float %cond1, float 56.
   ret float %cond2
@@ -331,7 +331,7 @@ define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
   %cmp1 = fcmp ult float 56., %b
   %cond1 = select nsz i1 %cmp1, float %b, float 56.
   %cmp2 = fcmp ult float %cond1, 78.
-  %cond2 = select nsz i1 %cmp2, float 78., float %cond1
+  %cond2 = select nnan nsz i1 %cmp2, float 78., float %cond1
   ret float %cond2
 }
 
@@ -342,7 +342,7 @@ define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
   %cmp1 = fcmp ule double 78., %b
   %cond1 = select nsz i1 %cmp1, double %b, double 78.
   %cmp2 = fcmp ule double %cond1, 90.
-  %cond2 = select nsz i1 %cmp2, double 90., double %cond1
+  %cond2 = select nnan nsz i1 %cmp2, double 90., double %cond1
   ret double %cond2
 }
 
@@ -353,7 +353,7 @@ define float @fp-armv8_vminmaxnm_0(float %a) {
   %cmp1 = fcmp ult float %a, 0.
   %cond1 = select nsz i1 %cmp1, float %a, float 0.
   %cmp2 = fcmp ogt float %cond1, 0.
-  %cond2 = select nsz i1 %cmp2, float %cond1, float 0.
+  %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 0.
   ret float %cond2
 }
 
@@ -362,7 +362,7 @@ define float @fp-armv8_vminmaxnm_neg0(float %a) {
 ; CHECK: vminnm.f32
 ; CHECK-NOT: vmaxnm.f32
   %cmp1 = fcmp olt float %a, -0.
-  %cond1 = select nsz i1 %cmp1, float %a, float -0.
+  %cond1 = select nnan nsz i1 %cmp1, float %a, float -0.
   %cmp2 = fcmp ugt float %cond1, -0.
   %cond2 = select i1 %cmp2, float %cond1, float -0.
   ret float %cond2
@@ -375,7 +375,7 @@ define float @fp-armv8_vminmaxnm_e_0(float %a) {
   %cmp1 = fcmp nsz ole float 0., %a
   %cond1 = select nsz i1 %cmp1, float 0., float %a
   %cmp2 = fcmp nsz uge float 0., %cond1
-  %cond2 = select nsz i1 %cmp2, float 0., float %cond1
+  %cond2 = select nnan nsz i1 %cmp2, float 0., float %cond1
   ret float %cond2
 }
 
@@ -384,7 +384,7 @@ define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
 ; CHECK: vminnm.f32
 ; CHECK-NOT: vmaxnm.f32
   %cmp1 = fcmp nsz ule float -0., %a
-  %cond1 = select nsz i1 %cmp1, float -0., float %a
+  %cond1 = select nnan nsz i1 %cmp1, float -0., float %a
   %cmp2 = fcmp nsz oge float -0., %cond1
   %cond2 = select i1 %cmp2, float -0., float %cond1
   ret float %cond2
diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index 8124fde2667dd..c816e79f0c26f 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -79,7 +79,7 @@ define <4 x float> @getsmaxf32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    blr
 entry:
   %0 = fcmp nnan nsz oge <4 x float> %a, %b
-  %1 = select nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
+  %1 = select nnan nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
 
@@ -90,7 +90,7 @@ define <2 x double> @getsmaxf64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-NEXT:    blr
 entry:
   %0 = fcmp nnan nsz oge <2 x double> %a, %b
-  %1 = select nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
+  %1 = select nnan nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
   ret <2 x double> %1
 }
 
@@ -171,7 +171,7 @@ define <4 x float> @getsminf32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    blr
 entry:
   %0 = fcmp nnan nsz ole <4 x float> %a, %b
-  %1 = select nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
+  %1 = select nnan nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
 
@@ -182,7 +182,7 @@ define <2 x double> @getsminf64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-NEXT:    blr
 entry:
   %0 = fcmp nnan nsz ole <2 x double> %a, %b
-  %1 = select nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
+  %1 = select nnan nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
   ret <2 x double> %1
 }
 
diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index 62e3d1d1c34e5..abce82087c5ba 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -497,12 +497,20 @@ define float @select_fcmp_olt_pos_zero(float %x) {
 ; CHECK-LABEL: select_fcmp_olt_pos_zero:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    fmin.s fa0, fa0, fa5
+; CHECK-NEXT:    flt.s a0, fa0, fa5
+; CHECK-NEXT:    bnez a0, .LBB21_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    fmv.s fa0, fa5
+; CHECK-NEXT:  .LBB21_2:
 ; CHECK-NEXT:    ret
 ;
 ; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
 ; CHECKZFINX:       # %bb.0:
-; CHECKZFINX-NEXT:    fmin.s a0, a0, zero
+; CHECKZFINX-NEXT:    flt.s a1, a0, zero
+; CHECKZFINX-NEXT:    bnez a1, .LBB21_2
+; CHECKZFINX-NEXT:  # %bb.1:
+; CHECKZFINX-NEXT:    li a0, 0
+; CHECKZFINX-NEXT:  .LBB21_2:
 ; CHECKZFINX-NEXT:    ret
   %cmp = fcmp olt float %x, 0.000000
   %sel = select i1 %cmp, float %x, float 0.000000
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index b1b0679306a90..d28d0d6685c8a 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -64,7 +64,7 @@ define double @f4(double %dummy, double %val) {
 ; CHECK-NEXT:    wfmaxdb %f0, %f2, %f0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp ogt double %val, 0.0
-  %ret = select i1 %cmp, double %val, double 0.0
+  %ret = select nnan nsz i1 %cmp, double %val, double 0.0
   ret double %ret
 }
 
@@ -175,7 +175,7 @@ define float @f14(float %dummy, float %val) {
 ; CHECK-NEXT:    wfmaxsb %f0, %f2, %f0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp ogt float %val, 0.0
-  %ret = select i1 %cmp, float %val, float 0.0
+  %ret = select nnan nsz i1 %cmp, float %val, float 0.0
   ret float %ret
 }
 
@@ -285,7 +285,7 @@ define void @f24(ptr %ptr, ptr %dst) {
 ; CHECK-NEXT:    br %r14
   %val = load fp128, ptr %ptr
   %cmp = fcmp ogt fp128 %val, 0xL00000000000000000000000000000000
-  %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
+  %res = select nnan nsz i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
   store fp128 %res, ptr %dst
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
index 10f9f28e599f8..03176314f2048 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
@@ -11,7 +11,7 @@ define <2 x double> @f1(<2 x double> %val) {
 ; CHECK-NEXT:    vfmaxdb %v24, %v24, %v0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp ogt <2 x double> %val,  zeroinitializer
-  %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
+  %ret = select nnan nsz <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
   ret <2 x double> %ret
 }
 
@@ -22,7 +22,7 @@ define <2 x double> @f2(<2 x double> %val) {
 ; CHECK-NEXT:    vfmindb %v24, %v24, %v0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp olt <2 x double> %val,  zeroinitializer
-  %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
+  %ret = select nnan nsz <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
   ret <2 x double> %ret
 }
 
@@ -33,7 +33,7 @@ define <4 x float> @f3(<4 x float> %val) {
 ; CHECK-NEXT:    vfmaxsb %v24, %v24, %v0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp ogt <4 x float> %val,  zeroinitializer
-  %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
+  %ret = select nnan nsz <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
   ret <4 x float> %ret
 }
 
@@ -44,7 +44,7 @@ define <4 x float> @f4(<4 x float> %val) {
 ; CHECK-NEXT:    vfminsb %v24, %v24, %v0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp olt <4 x float> %val,  zeroinitializer
-  %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
+  %ret = select nnan nsz <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
   ret <4 x float> %ret
 }
 
diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index dca5fcd60e0ce..89678eb6293c7 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -64,7 +64,7 @@ define double @f4(double %dummy, double %val) {
 ; CHECK-NEXT:    wfmindb %f0, %f2, %f0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp olt double %val, 0.0
-  %ret = select i1 %cmp, double %val, double 0.0
+  %ret = select nnan nsz i1 %cmp, double %val, double 0.0
   ret double %ret
 }
 
@@ -175,7 +175,7 @@ define float @f14(float %dummy, float %val) {
 ; CHECK-NEXT:    wfminsb %f0, %f2, %f0, 4
 ; CHECK-NEXT:    br %r14
   %cmp = fcmp olt float %val, 0.0
-  %ret = select i1 %cmp, float %val, float 0.0
+  %ret = select nnan nsz i1 %cmp, float %val, float 0.0
   ret float %ret
 }
 
@@ -285,7 +285,7 @@ define void @f24(ptr %ptr, ptr %dst) {
 ; CHECK-NEXT:    br %r14
   %val = load fp128, ptr %ptr
   %cmp = fcmp olt fp128 %val, 0xL00000000000000000000000000000000
-  %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
+  %res = select nnan nsz i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
   store fp128 %res, ptr %dst
   ret void
 }
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index d536e6b72ac9c..7e1454c5f3683 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -247,10 +247,26 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-MVE-LABEL: maxnm_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vmaxnm.f32 s3, s7, s3
-; CHECK-MVE-NEXT:    vmaxnm.f32 s2, s6, s2
-; CHECK-MVE-NEXT:    vmaxnm.f32 s1, s5, s1
-; CHECK-MVE-NEXT:    vmaxnm.f32 s0, s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s7, s3
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s5, s1
+; CHECK-MVE-NEXT:    cset r1, gt
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s6, s2
+; CHECK-MVE-NEXT:    cset r2, gt
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    cset r3, gt
+; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s6
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s5
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s7
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s0, s0, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: maxnm_float32_t:
@@ -259,7 +275,7 @@ define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x floa
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
   %cmp = fcmp fast ogt <4 x float> %src2, %src1
-  %0 = select <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1
+  %0 = select nnan nsz <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1
   ret <4 x float> %0
 }
 
@@ -268,23 +284,55 @@ define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half>
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
 ; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
-; CHECK-MVE-NEXT:    vminnm.f16 s0, s4, s0
-; CHECK-MVE-NEXT:    vminnm.f16 s8, s10, s8
-; CHECK-MVE-NEXT:    vins.f16 s0, s8
+; CHECK-MVE-NEXT:    vcmp.f16 s10, s8
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s8, s10, s8
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s0, s4, s0
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
+; CHECK-MVE-NEXT:    vins.f16 s0, s8
 ; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
-; CHECK-MVE-NEXT:    vminnm.f16 s1, s5, s1
-; CHECK-MVE-NEXT:    vminnm.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s8, s4
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s5, s1
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s1, s5, s1
 ; CHECK-MVE-NEXT:    vins.f16 s1, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
-; CHECK-MVE-NEXT:    vminnm.f16 s2, s6, s2
-; CHECK-MVE-NEXT:    vminnm.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s8, s4
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s2
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s2, s6, s2
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
 ; CHECK-MVE-NEXT:    vins.f16 s2, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
-; CHECK-MVE-NEXT:    vminnm.f16 s3, s7, s3
-; CHECK-MVE-NEXT:    vminnm.f16 s4, s6, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s7, s3
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s6, s4
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    cset r0, gt
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s3, s7, s3
 ; CHECK-MVE-NEXT:    vins.f16 s3, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -294,7 +342,7 @@ define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half>
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
   %cmp = fcmp fast ogt <8 x half> %src2, %src1
-  %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
+  %0 = select nnan nsz <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
   ret <8 x half> %0
 }
 
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
index eeb1d0d1e7dbc..751a5dcbda177 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
@@ -747,7 +747,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32(<4 x float> %z, <4 x flo
 entry:
   %c = fcmp oeq <4 x float> %z, zeroinitializer
   %a1 = fcmp fast olt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
   ret <4 x float> %b
 }
@@ -761,7 +761,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16(<8 x half> %z, <8 x half>
 entry:
   %c = fcmp oeq <8 x half> %z, zeroinitializer
   %a1 = fcmp fast olt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
   ret <8 x half> %b
 }
@@ -775,7 +775,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32(<4 x float> %z, <4 x flo
 entry:
   %c = fcmp oeq <4 x float> %z, zeroinitializer
   %a1 = fcmp fast ogt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
   ret <4 x float> %b
 }
@@ -789,7 +789,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16(<8 x half> %z, <8 x half>
 entry:
   %c = fcmp oeq <8 x half> %z, zeroinitializer
   %a1 = fcmp fast ogt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
   ret <8 x half> %b
 }
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
index de7af894bd4fb..e0350ff2fdf0e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
@@ -859,7 +859,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x f
 entry:
   %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
   %a1 = fcmp fast olt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
   ret <4 x float> %b
 }
@@ -874,7 +874,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x hal
 entry:
   %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
   %a1 = fcmp fast olt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
   ret <8 x half> %b
 }
@@ -889,7 +889,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x f
 entry:
   %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
   %a1 = fcmp fast ogt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
   ret <4 x float> %b
 }
@@ -904,7 +904,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x hal
 entry:
   %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
   %a1 = fcmp fast ogt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
   ret <8 x half> %b
 }
@@ -2435,7 +2435,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x f
 entry:
   %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
   %a1 = fcmp fast olt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
   ret <4 x float> %b
 }
@@ -2451,7 +2451,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x hal
 entry:
   %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
   %a1 = fcmp fast olt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
   ret <8 x half> %b
 }
@@ -2467,7 +2467,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x f
 entry:
   %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
   %a1 = fcmp fast ogt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
   ret <4 x float> %b
 }
@@ -2483,7 +2483,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x hal
 entry:
   %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
   %a1 = fcmp fast ogt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
   ret <8 x half> %b
 }
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
index 080c6c1a1efdc..e45a6281abb27 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
@@ -913,7 +913,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x f
 entry:
   %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
   %a1 = fcmp fast olt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
   ret <4 x float> %b
 }
@@ -928,7 +928,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x hal
 entry:
   %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
   %a1 = fcmp fast olt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
   ret <8 x half> %b
 }
@@ -943,7 +943,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x f
 entry:
   %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
   %a1 = fcmp fast ogt <4 x float> %x, %y
-  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+  %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
   %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
   ret <4 x float> %b
 }
@@ -958,7 +958,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x hal
 entry:
   %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
   %a1 = fcmp fast ogt <8 x half> %x, %y
-  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+  %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
   %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
   ret <8 x half> %b
 }
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index be737961e3ae7..26440f5087780 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -372,7 +372,7 @@ define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) {
 entry:
   %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
   %c = fcmp fast olt float %y, %z
-  %r = select i1 %c, float %y, float %z
+  %r = select nnan nsz i1 %c, float %y, float %z
   ret float %r
 }
 
@@ -395,7 +395,7 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
 entry:
   %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
   %c = fcmp fast olt float %y, %z
-  %r = select i1 %c, float %y, float %z
+  %r = select nnan nsz i1 %c, float %y, float %z
   ret float %r
 }
 
@@ -423,7 +423,7 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
 entry:
   %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
   %c = fcmp fast olt float %y, %z
-  %r = select i1 %c, float %y, float %z
+  %r = select nnan nsz i1 %c, float %y, float %z
   ret float %r
 }
 
@@ -450,7 +450,7 @@ define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   %c = fcmp fast olt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -464,7 +464,7 @@ define arm_aapcs_vfpcc half @fmin_v2f16_acc(<2 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
   %c = fcmp fast olt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -497,7 +497,7 @@ define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   %c = fcmp fast olt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -543,7 +543,7 @@ define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   %c = fcmp fast olt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -555,7 +555,7 @@ define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
 entry:
   %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
   %c = fcmp fast olt double %y, %z
-  %r = select i1 %c, double %y, double %z
+  %r = select nnan nsz i1 %c, double %y, double %z
   ret double %r
 }
 
@@ -568,7 +568,7 @@ define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) {
 entry:
   %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
   %c = fcmp fast olt double %y, %z
-  %r = select i1 %c, double %y, double %z
+  %r = select nnan nsz i1 %c, double %y, double %z
   ret double %r
 }
 
@@ -587,7 +587,7 @@ define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) {
 entry:
   %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
   %c = fcmp fast olt double %y, %z
-  %r = select i1 %c, double %y, double %z
+  %r = select nnan nsz i1 %c, double %y, double %z
   ret double %r
 }
 
@@ -1198,7 +1198,7 @@ define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) {
 entry:
   %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
   %c = fcmp fast ogt float %y, %z
-  %r = select i1 %c, float %y, float %z
+  %r = select nnan nsz i1 %c, float %y, float %z
   ret float %r
 }
 
@@ -1221,7 +1221,7 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
 entry:
   %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
   %c = fcmp fast ogt float %y, %z
-  %r = select i1 %c, float %y, float %z
+  %r = select nnan nsz i1 %c, float %y, float %z
   ret float %r
 }
 
@@ -1249,7 +1249,7 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
 entry:
   %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
   %c = fcmp fast ogt float %y, %z
-  %r = select i1 %c, float %y, float %z
+  %r = select nnan nsz i1 %c, float %y, float %z
   ret float %r
 }
 
@@ -1263,7 +1263,7 @@ define arm_aapcs_vfpcc half @fmax_v2f16_acc(<2 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
   %c = fcmp fast ogt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -1290,7 +1290,7 @@ define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   %c = fcmp fast ogt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -1323,7 +1323,7 @@ define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   %c = fcmp fast ogt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -1369,7 +1369,7 @@ define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
 entry:
   %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   %c = fcmp fast ogt half %y, %z
-  %r = select i1 %c, half %y, half %z
+  %r = select nnan nsz i1 %c, half %y, half %z
   ret half %r
 }
 
@@ -1381,7 +1381,7 @@ define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
 entry:
   %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
   %c = fcmp fast ogt double %y, %z
-  %r = select i1 %c, double %y, double %z
+  %r = select nnan nsz i1 %c, double %y, double %z
   ret double %r
 }
 
@@ -1394,7 +1394,7 @@ define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) {
 entry:
   %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
   %c = fcmp fast ogt double %y, %z
-  %r = select i1 %c, double %y, double %z
+  %r = select nnan nsz i1 %c, double %y, double %z
   ret double %r
 }
 
@@ -1413,7 +1413,7 @@ define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) {
 entry:
   %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
   %c = fcmp fast ogt double %y, %z
-  %r = select i1 %c, double %y, double %z
+  %r = select nnan nsz i1 %c, double %y, double %z
   ret double %r
 }
 

>From 1f0394697659f6a6fd66c59b4209c0e0a3b1c6bb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 19 Jan 2026 04:43:30 +0800
Subject: [PATCH 4/4] [AMDGPU] Fix AMDGPU tests. NFC.

---
 llvm/test/CodeGen/AMDGPU/fmax_legacy.ll | 18 +++++++++++-----
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll | 28 +++++++++++++++++--------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index f3a84e6e45260..b7830d13ce8e9 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -265,7 +265,9 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 
-; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; GCN: v_mul_f32_e32 [[CA:v[0-9]+]], 1.0, [[A]]
+; GCN: v_mul_f32_e32 [[CB:v[0-9]+]], 1.0, [[B]]
+; GCN: v_max_f32_e32 {{v[0-9]+}}, [[CA]], [[CB]]
 ; EG: MAX
 define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -312,11 +314,17 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
 
 ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32_fast:
 
-; GCN: v_max_f32_e32
-; GCN: v_max_f32_e32
-; GCN: v_max_f32_e32
+; VI: v_cmp_gt_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_gt_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_gt_f32_e32
+; VI: v_cndmask_b32_e32
 
-; GCN-NOT: v_max
+; SI: v_max_legacy_f32_e32
+; SI: v_max_legacy_f32_e32
+; SI: v_max_legacy_f32_e32
+; SI-NOT: v_max_
 define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 39eefa1879870..146864314b301 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -306,7 +306,9 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(ptr addrspace(1) %out, ptr
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 
-; GCN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; GCN: v_mul_f32_e32 [[CA:v[0-9]+]], 1.0, [[A]]
+; GCN: v_mul_f32_e32 [[CB:v[0-9]+]], 1.0, [[B]]
+; GCN: v_min_f32_e32 {{v[0-9]+}}, [[CA]], [[CB]]
 define amdgpu_kernel void @test_fmin_legacy_ult_v1f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
@@ -349,8 +351,12 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(ptr addrspace(1) %out, ptr
 ; GCN: {{buffer|flat}}_load_dwordx2
 ; GCN: {{buffer|flat}}_load_dwordx2
 
-; GCN: v_min_f32_e32
-; GCN: v_min_f32_e32
+; SI: v_min_legacy_f32_e32
+; SI: v_min_legacy_f32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
 define amdgpu_kernel void @test_fmin_legacy_ult_v2f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr <2 x float>, ptr addrspace(1) %in, i32 %tid
@@ -394,13 +400,17 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr
 }
 
 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32_fast:
-; VI-NOT: v_cmp
-; VI-NOT: v_cndmask
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
 
-; GCN: v_min_f32_e32
-; GCN: v_min_f32_e32
-; GCN: v_min_f32_e32
-; GCN-NOT: v_min_
+; SI: v_min_legacy_f32_e32
+; SI: v_min_legacy_f32_e32
+; SI: v_min_legacy_f32_e32
+; SI-NOT: v_min_
 define amdgpu_kernel void @test_fmin_legacy_ult_v3f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid