[llvm] [SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder (PR #93575)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 18 12:43:54 PST 2026
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/93575
>From 47733cae5e4b8919702d8f5a1de513bbdb2dc4b4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 28 May 2024 23:45:55 +0800
Subject: [PATCH 1/4] [RISCV][SDAG] Add pre-commit tests for PR93414. NFC.
---
llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index f08777ac3e5de..62e3d1d1c34e5 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -490,3 +490,21 @@ entry:
%retval.0 = select i1 %cmp, float 1.000000e+00, float %.a
ret float %retval.0
}
+
+; Test from PR93414
+; Make sure that we don't use fmin.s here to handle signed zero correctly.
+define float @select_fcmp_olt_pos_zero(float %x) {
+; CHECK-LABEL: select_fcmp_olt_pos_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.w.x fa5, zero
+; CHECK-NEXT: fmin.s fa0, fa0, fa5
+; CHECK-NEXT: ret
+;
+; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
+; CHECKZFINX: # %bb.0:
+; CHECKZFINX-NEXT: fmin.s a0, a0, zero
+; CHECKZFINX-NEXT: ret
+ %cmp = fcmp olt float %x, 0.000000
+ %sel = select i1 %cmp, float %x, float 0.000000
+ ret float %sel
+}
>From 81366134480626e8734866a37d2ec868a565aad2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 19 Jan 2026 01:39:53 +0800
Subject: [PATCH 2/4] [SDAG] Drop select -> fmax/min folding in
SelectionDAGBuilder
---
.../SelectionDAG/SelectionDAGBuilder.cpp | 26 -------------------
1 file changed, 26 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 27131e14141cc..93c46b8225112 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3864,32 +3864,6 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
case SPF_SMIN: Opc = ISD::SMIN; break;
- case SPF_FMINNUM:
- switch (SPR.NaNBehavior) {
- case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
- Opc = ISD::FMINNUM;
- break;
- }
- break;
- case SPF_FMAXNUM:
- switch (SPR.NaNBehavior) {
- case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
- Opc = ISD::FMAXNUM;
- break;
- }
- break;
case SPF_NABS:
Negate = true;
[[fallthrough]];
>From a2a574543c1e44d62ff37fc10d9a59428f021870 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 19 Jan 2026 03:57:49 +0800
Subject: [PATCH 3/4] [CodeGen] Fix trivial tests. NFC.
---
llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll | 4 +-
llvm/test/CodeGen/AArch64/arm64-fmax.ll | 6 +-
llvm/test/CodeGen/AArch64/select_fmf.ll | 20 +--
.../test/CodeGen/AArch64/sve-pred-selectop.ll | 8 +-
llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll | 135 +++++++++--------
llvm/test/CodeGen/AMDGPU/fmed3.ll | 4 +-
llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll | 135 +++++++++--------
llvm/test/CodeGen/AMDGPU/reduction.ll | 24 +--
.../AMDGPU/select-flags-to-fmin-fmax.ll | 138 ++++++++++--------
llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll | 28 ++--
.../test/CodeGen/ARM/fp16-vminmaxnm-vector.ll | 64 ++++----
.../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 2 +-
llvm/test/CodeGen/ARM/vminmaxnm-safe.ll | 40 ++---
llvm/test/CodeGen/PowerPC/vec-min-max.ll | 8 +-
llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 12 +-
llvm/test/CodeGen/SystemZ/vec-max-05.ll | 6 +-
.../CodeGen/SystemZ/vec-max-min-zerosplat.ll | 8 +-
llvm/test/CodeGen/SystemZ/vec-min-05.ll | 6 +-
llvm/test/CodeGen/Thumb2/mve-minmax.ll | 80 ++++++++--
llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll | 8 +-
.../test/CodeGen/Thumb2/mve-pred-selectop2.ll | 16 +-
.../test/CodeGen/Thumb2/mve-pred-selectop3.ll | 8 +-
.../CodeGen/Thumb2/mve-vecreduce-fminmax.ll | 40 ++---
23 files changed, 442 insertions(+), 358 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index aec31eced397e..c95024c8eacec 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -27,7 +27,7 @@ define double @test_cross(float %in) {
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
%cmp = fcmp olt float %in, 0.000000e+00
- %val = select i1 %cmp, float %in, float 0.000000e+00
+ %val = select nnan nsz i1 %cmp, float %in, float 0.000000e+00
%longer = fpext float %val to double
ret double %longer
@@ -68,7 +68,7 @@ define float @minnum_fcmp_nsz(float %x, float %y) {
define float @minnum_select_nsz(float %x, float %y) {
; CHECK-LABEL: minnum_select_nsz:
%cmp = fcmp nnan ole float %x, %y
- %sel = select nsz i1 %cmp, float %x, float %y
+ %sel = select nnan nsz i1 %cmp, float %x, float %y
ret float %sel
; CHECK: fminnm s0, s0, s1
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index 85104775339b6..3a4eb5426e6dd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -9,7 +9,7 @@ define double @test_direct(float %in) {
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
- %val = select i1 %cmp, float 0.000000e+00, float %in
+ %val = select nsz i1 %cmp, float 0.000000e+00, float %in
%longer = fpext float %val to double
ret double %longer
}
@@ -22,7 +22,7 @@ define double @test_cross(float %in) {
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan ult float %in, 0.000000e+00
- %val = select i1 %cmp, float %in, float 0.000000e+00
+ %val = select nsz i1 %cmp, float %in, float 0.000000e+00
%longer = fpext float %val to double
ret double %longer
}
@@ -37,7 +37,7 @@ define double @test_cross_fail_nan(float %in) {
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
- %val = select i1 %cmp, float %in, float 0.000000e+00
+ %val = select nsz i1 %cmp, float %in, float 0.000000e+00
%longer = fpext float %val to double
ret double %longer
}
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 88f517af65bb6..333e25fb5b824 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -8,12 +8,12 @@
define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
; CHECK-SD-LABEL: select_select_fold_select_and:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fminnm s4, s1, s2
; CHECK-SD-NEXT: fcmp s1, s2
+; CHECK-SD-NEXT: fmov s4, #0.50000000
+; CHECK-SD-NEXT: fcsel s1, s1, s2, lt
; CHECK-SD-NEXT: fmaxnm s2, s0, s3
-; CHECK-SD-NEXT: fmov s1, #0.50000000
-; CHECK-SD-NEXT: fccmp s4, s0, #4, lt
-; CHECK-SD-NEXT: fadd s1, s0, s1
+; CHECK-SD-NEXT: fccmp s1, s0, #4, lt
+; CHECK-SD-NEXT: fadd s1, s0, s4
; CHECK-SD-NEXT: fcsel s2, s2, s0, gt
; CHECK-SD-NEXT: fadd s4, s1, s2
; CHECK-SD-NEXT: fcmp s4, s1
@@ -98,13 +98,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i
define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
; CHECK-SD-LABEL: select_select_fold_select_or:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fminnm s4, s1, s2
; CHECK-SD-NEXT: fcmp s1, s2
-; CHECK-SD-NEXT: fmaxnm s2, s0, s3
-; CHECK-SD-NEXT: fmov s1, #0.50000000
-; CHECK-SD-NEXT: fccmp s4, s0, #0, ge
-; CHECK-SD-NEXT: fadd s1, s0, s1
-; CHECK-SD-NEXT: fcsel s2, s0, s2, gt
+; CHECK-SD-NEXT: fcsel s1, s1, s2, lt
+; CHECK-SD-NEXT: fccmp s0, s3, #0, ge
+; CHECK-SD-NEXT: fmov s2, #0.50000000
+; CHECK-SD-NEXT: fccmp s1, s0, #0, le
+; CHECK-SD-NEXT: fadd s1, s0, s2
+; CHECK-SD-NEXT: fcsel s2, s0, s3, gt
; CHECK-SD-NEXT: fadd s4, s1, s2
; CHECK-SD-NEXT: fcmp s4, s1
; CHECK-SD-NEXT: b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index 9a78726c450d1..faabf657601a2 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -660,7 +660,7 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
%a1 = fcmp fast olt <vscale x 4 x float> %x, %y
- %a = select <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
+ %a = select nnan nsz <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
%b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z
ret <vscale x 4 x float> %b
}
@@ -676,7 +676,7 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
%a1 = fcmp fast olt <vscale x 8 x half> %x, %y
- %a = select <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
+ %a = select nnan nsz <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
%b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z
ret <vscale x 8 x half> %b
}
@@ -692,7 +692,7 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
%a1 = fcmp fast ogt <vscale x 4 x float> %x, %y
- %a = select <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
+ %a = select nnan nsz <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y
%b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z
ret <vscale x 4 x float> %b
}
@@ -708,7 +708,7 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
%a1 = fcmp fast ogt <vscale x 8 x half> %x, %y
- %a = select <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
+ %a = select nnan nsz <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y
%b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z
ret <vscale x 8 x half> %b
}
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index bd28f72bb8913..e6da13ec5953b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -67,10 +67,10 @@ define half @test_fmax_legacy_ugt_f16_fast(half %a, half %b) #0 {
; SI-LABEL: test_fmax_legacy_ugt_f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_max_f32_e32 v0, v0, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -168,24 +168,28 @@ define <2 x half> @test_fmax_legacy_ugt_v2f16_fast(<2 x half> %a, <2 x half> %b)
; VI-LABEL: test_fmax_legacy_ugt_v2f16_fast:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_e32 v0, v0, v1
-; VI-NEXT: v_or_b32_e32 v0, v0, v2
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; VI-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2
+; VI-NEXT: v_cndmask_b32_sdwa v2, v2, v3, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1
+; VI-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: s_setpc_b64 s[30:31]
;
; SI-LABEL: test_fmax_legacy_ugt_v2f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_max_f32_e32 v0, v0, v2
-; SI-NEXT: v_max_f32_e32 v1, v1, v3
+; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT: v_max_legacy_f32_e32 v0, v2, v0
+; SI-NEXT: v_max_legacy_f32_e32 v1, v3, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmax_legacy_ugt_v2f16_fast:
@@ -285,6 +289,8 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16_fast(<3 x half> %a, <3 x half> %b)
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v3, v3, v3
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -292,39 +298,46 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16_fast(<3 x half> %a, <3 x half> %b)
; VI-LABEL: test_fmax_legacy_ugt_v3f16_fast:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_e32 v0, v0, v2
-; VI-NEXT: v_max_f16_e32 v1, v1, v3
-; VI-NEXT: v_or_b32_e32 v0, v0, v4
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; VI-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4
+; VI-NEXT: v_cndmask_b32_sdwa v4, v4, v5, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3
+; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2
+; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: s_setpc_b64 s[30:31]
;
; SI-LABEL: test_fmax_legacy_ugt_v3f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_max_f32_e32 v0, v0, v3
-; SI-NEXT: v_max_f32_e32 v1, v1, v4
-; SI-NEXT: v_max_f32_e32 v2, v2, v5
+; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT: v_max_legacy_f32_e32 v0, v3, v0
+; SI-NEXT: v_max_legacy_f32_e32 v1, v4, v1
+; SI-NEXT: v_max_legacy_f32_e32 v2, v5, v2
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmax_legacy_ugt_v3f16_fast:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ugt <3 x half> %a, %b
@@ -461,26 +474,26 @@ define <4 x half> @test_fmax_legacy_ugt_v4f16_fast(<4 x half> %a, <4 x half> %b)
; SI-LABEL: test_fmax_legacy_ugt_v4f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT: v_cvt_f32_f16_e32 v6, v6
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_max_f32_e32 v0, v0, v4
-; SI-NEXT: v_max_f32_e32 v1, v1, v5
-; SI-NEXT: v_max_f32_e32 v2, v2, v6
-; SI-NEXT: v_max_f32_e32 v3, v3, v7
+; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT: v_max_legacy_f32_e32 v0, v4, v0
+; SI-NEXT: v_max_legacy_f32_e32 v1, v5, v1
+; SI-NEXT: v_max_legacy_f32_e32 v2, v6, v2
+; SI-NEXT: v_max_legacy_f32_e32 v3, v7, v3
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmax_legacy_ugt_v4f16_fast:
@@ -701,46 +714,46 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16_fast(<8 x half> %a, <8 x half> %b)
; SI-LABEL: test_fmax_legacy_ugt_v8f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v15, v15
; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
+; SI-NEXT: v_cvt_f16_f32_e32 v15, v15
; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
+; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
+; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; SI-NEXT: v_cvt_f16_f32_e32 v11, v11
+; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT: v_cvt_f16_f32_e32 v10, v10
+; SI-NEXT: v_cvt_f16_f32_e32 v11, v11
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
+; SI-NEXT: v_cvt_f16_f32_e32 v10, v10
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v15, v15
+; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; SI-NEXT: v_cvt_f32_f16_e32 v14, v14
+; SI-NEXT: v_cvt_f32_f16_e32 v15, v15
; SI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; SI-NEXT: v_cvt_f32_f16_e32 v13, v13
+; SI-NEXT: v_cvt_f32_f16_e32 v14, v14
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; SI-NEXT: v_cvt_f32_f16_e32 v12, v12
+; SI-NEXT: v_cvt_f32_f16_e32 v13, v13
; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; SI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; SI-NEXT: v_cvt_f32_f16_e32 v12, v12
; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; SI-NEXT: v_cvt_f32_f16_e32 v11, v11
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; SI-NEXT: v_cvt_f32_f16_e32 v10, v10
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT: v_cvt_f32_f16_e32 v9, v9
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_max_f32_e32 v0, v0, v8
-; SI-NEXT: v_max_f32_e32 v1, v1, v9
-; SI-NEXT: v_max_f32_e32 v2, v2, v10
-; SI-NEXT: v_max_f32_e32 v3, v3, v11
-; SI-NEXT: v_max_f32_e32 v4, v4, v12
-; SI-NEXT: v_max_f32_e32 v5, v5, v13
-; SI-NEXT: v_max_f32_e32 v6, v6, v14
-; SI-NEXT: v_max_f32_e32 v7, v7, v15
+; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT: v_max_legacy_f32_e32 v0, v8, v0
+; SI-NEXT: v_max_legacy_f32_e32 v1, v9, v1
+; SI-NEXT: v_max_legacy_f32_e32 v2, v10, v2
+; SI-NEXT: v_max_legacy_f32_e32 v3, v11, v3
+; SI-NEXT: v_max_legacy_f32_e32 v4, v12, v4
+; SI-NEXT: v_max_legacy_f32_e32 v5, v13, v5
+; SI-NEXT: v_max_legacy_f32_e32 v6, v14, v6
+; SI-NEXT: v_max_legacy_f32_e32 v7, v15, v7
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmax_legacy_ugt_v8f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 29163c111fc5e..19fa6151c406b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -1105,11 +1105,11 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; fmax_legacy
%cmp0 = fcmp ule float %a.nnan, 2.0
- %max = select i1 %cmp0, float 2.0, float %a.nnan
+ %max = select nnan nsz i1 %cmp0, float 2.0, float %a.nnan
; fmin_legacy
%cmp1 = fcmp uge float %max, 4.0
- %med = select i1 %cmp1, float 4.0, float %max
+ %med = select nnan nsz i1 %cmp1, float 4.0, float %max
store float %med, ptr addrspace(1) %outgep
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index 40c2ec0a39f51..e7ebe5cfbb189 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -68,10 +68,10 @@ define half @test_fmin_legacy_ule_f16_fast(half %a, half %b) #0 {
; SI-LABEL: test_fmin_legacy_ule_f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_min_f32_e32 v0, v0, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -169,24 +169,28 @@ define <2 x half> @test_fmin_legacy_ule_v2f16_fast(<2 x half> %a, <2 x half> %b)
; VI-LABEL: test_fmin_legacy_ule_v2f16_fast:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_min_f16_e32 v0, v0, v1
-; VI-NEXT: v_or_b32_e32 v0, v0, v2
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
+; VI-NEXT: v_cndmask_b32_sdwa v2, v2, v3, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
+; VI-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: s_setpc_b64 s[30:31]
;
; SI-LABEL: test_fmin_legacy_ule_v2f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_min_f32_e32 v0, v0, v2
-; SI-NEXT: v_min_f32_e32 v1, v1, v3
+; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; SI-NEXT: v_min_legacy_f32_e32 v1, v3, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmin_legacy_ule_v2f16_fast:
@@ -286,6 +290,8 @@ define <3 x half> @test_fmin_legacy_ule_v3f16_fast(<3 x half> %a, <3 x half> %b)
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v3, v3, v3
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -293,39 +299,46 @@ define <3 x half> @test_fmin_legacy_ule_v3f16_fast(<3 x half> %a, <3 x half> %b)
; VI-LABEL: test_fmin_legacy_ule_v3f16_fast:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_min_f16_e32 v0, v0, v2
-; VI-NEXT: v_min_f16_e32 v1, v1, v3
-; VI-NEXT: v_or_b32_e32 v0, v0, v4
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
+; VI-NEXT: v_cndmask_b32_sdwa v4, v4, v5, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
+; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
+; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: s_setpc_b64 s[30:31]
;
; SI-LABEL: test_fmin_legacy_ule_v3f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_min_f32_e32 v0, v0, v3
-; SI-NEXT: v_min_f32_e32 v1, v1, v4
-; SI-NEXT: v_min_f32_e32 v2, v2, v5
+; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT: v_min_legacy_f32_e32 v0, v3, v0
+; SI-NEXT: v_min_legacy_f32_e32 v1, v4, v1
+; SI-NEXT: v_min_legacy_f32_e32 v2, v5, v2
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmin_legacy_ule_v3f16_fast:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule <3 x half> %a, %b
@@ -462,26 +475,26 @@ define <4 x half> @test_fmin_legacy_ule_v4f16_fast(<4 x half> %a, <4 x half> %b)
; SI-LABEL: test_fmin_legacy_ule_v4f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SI-NEXT: v_cvt_f32_f16_e32 v6, v6
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_min_f32_e32 v0, v0, v4
-; SI-NEXT: v_min_f32_e32 v1, v1, v5
-; SI-NEXT: v_min_f32_e32 v2, v2, v6
-; SI-NEXT: v_min_f32_e32 v3, v3, v7
+; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT: v_min_legacy_f32_e32 v0, v4, v0
+; SI-NEXT: v_min_legacy_f32_e32 v1, v5, v1
+; SI-NEXT: v_min_legacy_f32_e32 v2, v6, v2
+; SI-NEXT: v_min_legacy_f32_e32 v3, v7, v3
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmin_legacy_ule_v4f16_fast:
@@ -702,46 +715,46 @@ define <8 x half> @test_fmin_legacy_ule_v8f16_fast(<8 x half> %a, <8 x half> %b)
; SI-LABEL: test_fmin_legacy_ule_v8f16_fast:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v15, v15
; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
+; SI-NEXT: v_cvt_f16_f32_e32 v15, v15
; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
+; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
+; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; SI-NEXT: v_cvt_f16_f32_e32 v11, v11
+; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT: v_cvt_f16_f32_e32 v10, v10
+; SI-NEXT: v_cvt_f16_f32_e32 v11, v11
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
+; SI-NEXT: v_cvt_f16_f32_e32 v10, v10
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e32 v15, v15
+; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; SI-NEXT: v_cvt_f32_f16_e32 v14, v14
+; SI-NEXT: v_cvt_f32_f16_e32 v15, v15
; SI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; SI-NEXT: v_cvt_f32_f16_e32 v13, v13
+; SI-NEXT: v_cvt_f32_f16_e32 v14, v14
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; SI-NEXT: v_cvt_f32_f16_e32 v12, v12
+; SI-NEXT: v_cvt_f32_f16_e32 v13, v13
; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; SI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; SI-NEXT: v_cvt_f32_f16_e32 v12, v12
; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; SI-NEXT: v_cvt_f32_f16_e32 v11, v11
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; SI-NEXT: v_cvt_f32_f16_e32 v10, v10
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT: v_cvt_f32_f16_e32 v9, v9
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_min_f32_e32 v0, v0, v8
-; SI-NEXT: v_min_f32_e32 v1, v1, v9
-; SI-NEXT: v_min_f32_e32 v2, v2, v10
-; SI-NEXT: v_min_f32_e32 v3, v3, v11
-; SI-NEXT: v_min_f32_e32 v4, v4, v12
-; SI-NEXT: v_min_f32_e32 v5, v5, v13
-; SI-NEXT: v_min_f32_e32 v6, v6, v14
-; SI-NEXT: v_min_f32_e32 v7, v7, v15
+; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SI-NEXT: v_min_legacy_f32_e32 v0, v8, v0
+; SI-NEXT: v_min_legacy_f32_e32 v1, v9, v1
+; SI-NEXT: v_min_legacy_f32_e32 v2, v10, v2
+; SI-NEXT: v_min_legacy_f32_e32 v3, v11, v3
+; SI-NEXT: v_min_legacy_f32_e32 v4, v12, v4
+; SI-NEXT: v_min_legacy_f32_e32 v5, v13, v5
+; SI-NEXT: v_min_legacy_f32_e32 v6, v14, v6
+; SI-NEXT: v_min_legacy_f32_e32 v7, v15, v7
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_fmin_legacy_ule_v8f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 291eccd405b8a..0dd2b0ba5550b 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -619,8 +619,6 @@ define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
; GFX9-LABEL: reduction_fast_max_pattern_v4f16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -628,21 +626,17 @@ define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
; VI-LABEL: reduction_fast_max_pattern_v4f16:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_e32 v1, v1, v1
-; VI-NEXT: v_max_f16_e32 v0, v0, v0
-; VI-NEXT: v_max_f16_e32 v2, v3, v2
+; VI-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NEXT: v_max_f16_e32 v0, v0, v1
; VI-NEXT: v_max_f16_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
- %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
+ %rdx.minmax.select = select nnan nsz <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
- %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
+ %rdx.minmax.select3 = select nnan nsz <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
ret half %res
}
@@ -653,8 +647,6 @@ define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
; GFX9-LABEL: reduction_fast_min_pattern_v4f16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: v_min_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -662,21 +654,17 @@ define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
; VI-LABEL: reduction_fast_min_pattern_v4f16:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_e32 v1, v1, v1
-; VI-NEXT: v_max_f16_e32 v0, v0, v0
-; VI-NEXT: v_min_f16_e32 v2, v3, v2
+; VI-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NEXT: v_min_f16_e32 v0, v0, v1
; VI-NEXT: v_min_f16_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
- %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
+ %rdx.minmax.select = select nnan nsz <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
- %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
+ %rdx.minmax.select3 = select nnan nsz <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
ret half %res
}
diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
index 09f7e7a926376..9aae8ec448261 100644
--- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
@@ -355,15 +355,17 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
@@ -373,7 +375,12 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule <2 x float> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -492,15 +499,17 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
@@ -510,7 +519,12 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge <2 x float> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -659,10 +673,10 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -838,10 +852,10 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1086,16 +1100,16 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
@@ -1329,16 +1343,16 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
@@ -1659,26 +1673,26 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
+; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
+; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
@@ -2001,26 +2015,26 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
+; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
+; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
+; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 52fe5ce1a8a5f..b088606cb5225 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -214,7 +214,7 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
%cmp1 = fcmp olt half %a, 12.
- %cond1 = select i1 %cmp1, half %a, half 12.
+ %cond1 = select nnan nsz i1 %cmp1, half %a, half 12.
%cmp2 = fcmp olt half 34., %cond1
%cond2 = select i1 %cmp2, half 34., half %cond1
ret half %cond2
@@ -242,7 +242,7 @@ entry:
%cmp1 = fcmp ogt half %a, 56.
%cond1 = select i1 %cmp1, half 56., half %a
%cmp2 = fcmp ogt half 78., %cond1
- %cond2 = select i1 %cmp2, half %cond1, half 78.
+ %cond2 = select nnan nsz i1 %cmp2, half %cond1, half 78.
ret half %cond2
}
@@ -264,7 +264,7 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
%cmp1 = fcmp ult half 12., %b
- %cond1 = select i1 %cmp1, half 12., half %b
+ %cond1 = select nnan nsz i1 %cmp1, half 12., half %b
%cmp2 = fcmp ult half %cond1, 34.
%cond2 = select i1 %cmp2, half %cond1, half 34.
ret half %cond2
@@ -291,7 +291,7 @@ define half @fp16_vminnm_NNNule(half %b) {
entry:
%cmp1 = fcmp ule half 34., %b
- %cond1 = select i1 %cmp1, half 34., half %b
+ %cond1 = select nnan nsz i1 %cmp1, half 34., half %b
%cmp2 = fcmp ule half %cond1, 56.
%cond2 = select i1 %cmp2, half %cond1, half 56.
ret half %cond2
@@ -321,7 +321,7 @@ entry:
%cmp1 = fcmp ugt half 56., %b
%cond1 = select i1 %cmp1, half %b, half 56.
%cmp2 = fcmp ugt half %cond1, 78.
- %cond2 = select i1 %cmp2, half 78., half %cond1
+ %cond2 = select nnan nsz i1 %cmp2, half 78., half %cond1
ret half %cond2
}
@@ -343,7 +343,7 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
%cmp1 = fcmp ogt half %a, 12.
- %cond1 = select i1 %cmp1, half %a, half 12.
+ %cond1 = select nnan nsz i1 %cmp1, half %a, half 12.
%cmp2 = fcmp ogt half 34., %cond1
%cond2 = select i1 %cmp2, half 34., half %cond1
ret half %cond2
@@ -369,7 +369,7 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK-NEXT: .short 0x5300 @ half 56
entry:
%cmp1 = fcmp oge half %a, 34.
- %cond1 = select i1 %cmp1, half %a, half 34.
+ %cond1 = select nnan nsz i1 %cmp1, half %a, half 34.
%cmp2 = fcmp oge half 56., %cond1
%cond2 = select i1 %cmp2, half 56., half %cond1
ret half %cond2
@@ -397,7 +397,7 @@ entry:
%cmp1 = fcmp olt half %a, 56.
%cond1 = select i1 %cmp1, half 56., half %a
%cmp2 = fcmp olt half 78., %cond1
- %cond2 = select i1 %cmp2, half %cond1, half 78.
+ %cond2 = select nnan nsz i1 %cmp2, half %cond1, half 78.
ret half %cond2
}
@@ -423,7 +423,7 @@ entry:
%cmp1 = fcmp ole half %a, 78.
%cond1 = select i1 %cmp1, half 78., half %a
%cmp2 = fcmp ole half 90., %cond1
- %cond2 = select i1 %cmp2, half %cond1, half 90.
+ %cond2 = select nnan nsz i1 %cmp2, half %cond1, half 90.
ret half %cond2
}
@@ -445,7 +445,7 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
%cmp1 = fcmp ugt half 12., %b
- %cond1 = select i1 %cmp1, half 12., half %b
+ %cond1 = select nnan nsz i1 %cmp1, half 12., half %b
%cmp2 = fcmp ugt half %cond1, 34.
%cond2 = select i1 %cmp2, half %cond1, half 34.
ret half %cond2
@@ -471,7 +471,7 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK-NEXT: .short 0x5300 @ half 56
entry:
%cmp1 = fcmp uge half 34., %b
- %cond1 = select i1 %cmp1, half 34., half %b
+ %cond1 = select nnan nsz i1 %cmp1, half 34., half %b
%cmp2 = fcmp uge half %cond1, 56.
%cond2 = select i1 %cmp2, half %cond1, half 56.
ret half %cond2
@@ -494,7 +494,7 @@ define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-NEXT: .short 0x8000 @ half -0
entry:
%cmp1 = fcmp olt half %a, -0.
- %cond1 = select i1 %cmp1, half %a, half -0.
+ %cond1 = select nnan nsz i1 %cmp1, half %a, half -0.
%cmp2 = fcmp ugt half %cond1, -0.
%cond2 = select i1 %cmp2, half %cond1, half -0.
ret half %cond2
@@ -519,7 +519,7 @@ entry:
%cmp1 = fcmp nsz ole half 0., %a
%cond1 = select nsz i1 %cmp1, half 0., half %a
%cmp2 = fcmp nsz uge half 0., %cond1
- %cond2 = select nsz i1 %cmp2, half 0., half %cond1
+ %cond2 = select nnan nsz i1 %cmp2, half 0., half %cond1
ret half %cond2
}
@@ -540,7 +540,7 @@ define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-NEXT: .short 0x8000 @ half -0
entry:
%cmp1 = fcmp nsz ule half -0., %a
- %cond1 = select nsz i1 %cmp1, half -0., half %a
+ %cond1 = select nnan nsz i1 %cmp1, half -0., half %a
%cmp2 = fcmp nsz oge half -0., %cond1
%cond2 = select i1 %cmp2, half -0., half %cond1
ret half %cond2
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
index 0b41c738080a6..eb401ae051305 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
@@ -10,7 +10,7 @@ define <4 x half> @test1(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -19,7 +19,7 @@ define <4 x half> @test2(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -28,7 +28,7 @@ define <4 x half> @test3(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -37,7 +37,7 @@ define <4 x half> @test4(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -46,7 +46,7 @@ define <4 x half> @test5(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -55,7 +55,7 @@ define <4 x half> @test6(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -64,7 +64,7 @@ define <4 x half> @test7(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -73,7 +73,7 @@ define <4 x half> @test8(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -84,7 +84,7 @@ define <4 x half> @test11(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -93,7 +93,7 @@ define <4 x half> @test12(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -102,7 +102,7 @@ define <4 x half> @test13(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -111,7 +111,7 @@ define <4 x half> @test14(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -120,7 +120,7 @@ define <4 x half> @test15(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -129,7 +129,7 @@ define <4 x half> @test16(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -138,7 +138,7 @@ define <4 x half> @test17(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -147,7 +147,7 @@ define <4 x half> @test18(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <4 x half> %A, %B
- %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nnan nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -160,7 +160,7 @@ define <8 x half> @test201(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -169,7 +169,7 @@ define <8 x half> @test202(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -178,7 +178,7 @@ define <8 x half> @test203(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -187,7 +187,7 @@ define <8 x half> @test204(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -196,7 +196,7 @@ define <8 x half> @test205(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -205,7 +205,7 @@ define <8 x half> @test206(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -214,7 +214,7 @@ define <8 x half> @test207(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -223,7 +223,7 @@ define <8 x half> @test208(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -234,7 +234,7 @@ define <8 x half> @test209(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -243,7 +243,7 @@ define <8 x half> @test210(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -252,7 +252,7 @@ define <8 x half> @test211(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -261,7 +261,7 @@ define <8 x half> @test214(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -270,7 +270,7 @@ define <8 x half> @test215(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -279,7 +279,7 @@ define <8 x half> @test216(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
@@ -288,7 +288,7 @@ define <8 x half> @test217(<8 x half> %A, <8 x half> %B) {
; CHECK: vminnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
ret <8 x half> %tmp4
}
@@ -297,6 +297,6 @@ define <8 x half> @test218(<8 x half> %A, <8 x half> %B) {
; CHECK: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <8 x half> %A, %B
- %tmp4 = select nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
+ %tmp4 = select nnan nsz <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
ret <8 x half> %tmp4
}
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 6706d25ae01d2..65c84619914e2 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1387,7 +1387,7 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
entry:
%broadcast.splat = shufflevector <4 x float> %splat, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
%0 = fcmp ogt <4 x float> %broadcast.splat, zeroinitializer
- %1 = select <4 x i1> %0, <4 x float> %broadcast.splat, <4 x float> zeroinitializer
+ %1 = select nnan nsz <4 x i1> %0, <4 x float> %broadcast.splat, <4 x float> zeroinitializer
store <4 x float> %1, ptr %y, align 4
ret void
}
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 5577ab49bb830..ad411c0a261a5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -175,7 +175,7 @@ define float @fp-armv8_vminnm_NNNo(float %a) {
; CHECK: vminnm.f32
; CHECK-NOT: vminnm.f32
%cmp1 = fcmp olt float %a, 12.
- %cond1 = select nsz i1 %cmp1, float %a, float 12.
+ %cond1 = select nnan nsz i1 %cmp1, float %a, float 12.
%cmp2 = fcmp olt float 34., %cond1
%cond2 = select i1 %cmp2, float 34., float %cond1
ret float %cond2
@@ -186,7 +186,7 @@ define double @fp-armv8_vminnm_NNNole(double %a) {
; CHECK: vminnm.f64
; CHECK-NOT: vminnm.f64
%cmp1 = fcmp ole double %a, 34.
- %cond1 = select nsz i1 %cmp1, double %a, double 34.
+ %cond1 = select nnan nsz i1 %cmp1, double %a, double 34.
%cmp2 = fcmp ole double 56., %cond1
%cond2 = select i1 %cmp2, double 56., double %cond1
ret double %cond2
@@ -199,7 +199,7 @@ define float @fp-armv8_vminnm_NNNo_rev(float %a) {
%cmp1 = fcmp ogt float %a, 56.
%cond1 = select nsz i1 %cmp1, float 56., float %a
%cmp2 = fcmp ogt float 78., %cond1
- %cond2 = select nsz i1 %cmp2, float %cond1, float 78.
+ %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 78.
ret float %cond2
}
@@ -210,7 +210,7 @@ define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
%cmp1 = fcmp oge double %a, 78.
%cond1 = select nsz i1 %cmp1, double 78., double %a
%cmp2 = fcmp oge double 90., %cond1
- %cond2 = select nsz i1 %cmp2, double %cond1, double 90.
+ %cond2 = select nnan nsz i1 %cmp2, double %cond1, double 90.
ret double %cond2
}
@@ -221,7 +221,7 @@ define float @fp-armv8_vminnm_NNNu(float %b) {
%cmp1 = fcmp ult float 12., %b
%cond1 = select nsz i1 %cmp1, float 12., float %b
%cmp2 = fcmp ult float %cond1, 34.
- %cond2 = select i1 %cmp2, float %cond1, float 34.
+ %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 34.
ret float %cond2
}
@@ -230,7 +230,7 @@ define float @fp-armv8_vminnm_NNNule(float %b) {
; CHECK: vminnm.f32
; CHECK-NOT: vminnm.f32
%cmp1 = fcmp ule float 34., %b
- %cond1 = select nsz i1 %cmp1, float 34., float %b
+ %cond1 = select nnan nsz i1 %cmp1, float 34., float %b
%cmp2 = fcmp ule float %cond1, 56.
%cond2 = select i1 %cmp2, float %cond1, float 56.
ret float %cond2
@@ -243,7 +243,7 @@ define float @fp-armv8_vminnm_NNNu_rev(float %b) {
%cmp1 = fcmp ugt float 56., %b
%cond1 = select nsz i1 %cmp1, float %b, float 56.
%cmp2 = fcmp ugt float %cond1, 78.
- %cond2 = select nsz i1 %cmp2, float 78., float %cond1
+ %cond2 = select nnan nsz i1 %cmp2, float 78., float %cond1
ret float %cond2
}
@@ -254,7 +254,7 @@ define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
%cmp1 = fcmp uge double 78., %b
%cond1 = select nsz i1 %cmp1, double %b, double 78.
%cmp2 = fcmp uge double %cond1, 90.
- %cond2 = select nsz i1 %cmp2, double 90., double %cond1
+ %cond2 = select nnan nsz i1 %cmp2, double 90., double %cond1
ret double %cond2
}
@@ -265,7 +265,7 @@ define float @fp-armv8_vmaxnm_NNNo(float %a) {
%cmp1 = fcmp ogt float %a, 12.
%cond1 = select nsz i1 %cmp1, float %a, float 12.
%cmp2 = fcmp ogt float 34., %cond1
- %cond2 = select i1 %cmp2, float 34., float %cond1
+ %cond2 = select nnan nsz i1 %cmp2, float 34., float %cond1
ret float %cond2
}
@@ -274,7 +274,7 @@ define float @fp-armv8_vmaxnm_NNNoge(float %a) {
; CHECK: vmaxnm.f32
; CHECK-NOT: vmaxnm.f32
%cmp1 = fcmp oge float %a, 34.
- %cond1 = select nsz i1 %cmp1, float %a, float 34.
+ %cond1 = select nnan nsz i1 %cmp1, float %a, float 34.
%cmp2 = fcmp oge float 56., %cond1
%cond2 = select i1 %cmp2, float 56., float %cond1
ret float %cond2
@@ -287,7 +287,7 @@ define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
%cmp1 = fcmp olt float %a, 56.
%cond1 = select nsz i1 %cmp1, float 56., float %a
%cmp2 = fcmp olt float 78., %cond1
- %cond2 = select nsz i1 %cmp2, float %cond1, float 78.
+ %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 78.
ret float %cond2
}
@@ -298,7 +298,7 @@ define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
%cmp1 = fcmp ole float %a, 78.
%cond1 = select nsz i1 %cmp1, float 78., float %a
%cmp2 = fcmp ole float 90., %cond1
- %cond2 = select nsz i1 %cmp2, float %cond1, float 90.
+ %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 90.
ret float %cond2
}
@@ -307,7 +307,7 @@ define float @fp-armv8_vmaxnm_NNNu(float %b) {
; CHECK: vmaxnm.f32
; CHECK-NOT: vmaxnm.f32
%cmp1 = fcmp ugt float 12., %b
- %cond1 = select nsz i1 %cmp1, float 12., float %b
+ %cond1 = select nnan nsz i1 %cmp1, float 12., float %b
%cmp2 = fcmp ugt float %cond1, 34.
%cond2 = select i1 %cmp2, float %cond1, float 34.
ret float %cond2
@@ -318,7 +318,7 @@ define float @fp-armv8_vmaxnm_NNNuge(float %b) {
; CHECK: vmaxnm.f32
; CHECK-NOT: vmaxnm.f32
%cmp1 = fcmp uge float 34., %b
- %cond1 = select nsz i1 %cmp1, float 34., float %b
+ %cond1 = select nnan nsz i1 %cmp1, float 34., float %b
%cmp2 = fcmp uge float %cond1, 56.
%cond2 = select i1 %cmp2, float %cond1, float 56.
ret float %cond2
@@ -331,7 +331,7 @@ define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
%cmp1 = fcmp ult float 56., %b
%cond1 = select nsz i1 %cmp1, float %b, float 56.
%cmp2 = fcmp ult float %cond1, 78.
- %cond2 = select nsz i1 %cmp2, float 78., float %cond1
+ %cond2 = select nnan nsz i1 %cmp2, float 78., float %cond1
ret float %cond2
}
@@ -342,7 +342,7 @@ define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
%cmp1 = fcmp ule double 78., %b
%cond1 = select nsz i1 %cmp1, double %b, double 78.
%cmp2 = fcmp ule double %cond1, 90.
- %cond2 = select nsz i1 %cmp2, double 90., double %cond1
+ %cond2 = select nnan nsz i1 %cmp2, double 90., double %cond1
ret double %cond2
}
@@ -353,7 +353,7 @@ define float @fp-armv8_vminmaxnm_0(float %a) {
%cmp1 = fcmp ult float %a, 0.
%cond1 = select nsz i1 %cmp1, float %a, float 0.
%cmp2 = fcmp ogt float %cond1, 0.
- %cond2 = select nsz i1 %cmp2, float %cond1, float 0.
+ %cond2 = select nnan nsz i1 %cmp2, float %cond1, float 0.
ret float %cond2
}
@@ -362,7 +362,7 @@ define float @fp-armv8_vminmaxnm_neg0(float %a) {
; CHECK: vminnm.f32
; CHECK-NOT: vmaxnm.f32
%cmp1 = fcmp olt float %a, -0.
- %cond1 = select nsz i1 %cmp1, float %a, float -0.
+ %cond1 = select nnan nsz i1 %cmp1, float %a, float -0.
%cmp2 = fcmp ugt float %cond1, -0.
%cond2 = select i1 %cmp2, float %cond1, float -0.
ret float %cond2
@@ -375,7 +375,7 @@ define float @fp-armv8_vminmaxnm_e_0(float %a) {
%cmp1 = fcmp nsz ole float 0., %a
%cond1 = select nsz i1 %cmp1, float 0., float %a
%cmp2 = fcmp nsz uge float 0., %cond1
- %cond2 = select nsz i1 %cmp2, float 0., float %cond1
+ %cond2 = select nnan nsz i1 %cmp2, float 0., float %cond1
ret float %cond2
}
@@ -384,7 +384,7 @@ define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
; CHECK: vminnm.f32
; CHECK-NOT: vmaxnm.f32
%cmp1 = fcmp nsz ule float -0., %a
- %cond1 = select nsz i1 %cmp1, float -0., float %a
+ %cond1 = select nnan nsz i1 %cmp1, float -0., float %a
%cmp2 = fcmp nsz oge float -0., %cond1
%cond2 = select i1 %cmp2, float -0., float %cond1
ret float %cond2
diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index 8124fde2667dd..c816e79f0c26f 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -79,7 +79,7 @@ define <4 x float> @getsmaxf32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz oge <4 x float> %a, %b
- %1 = select nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
+ %1 = select nnan nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
}
@@ -90,7 +90,7 @@ define <2 x double> @getsmaxf64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz oge <2 x double> %a, %b
- %1 = select nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
+ %1 = select nnan nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
}
@@ -171,7 +171,7 @@ define <4 x float> @getsminf32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz ole <4 x float> %a, %b
- %1 = select nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
+ %1 = select nnan nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
}
@@ -182,7 +182,7 @@ define <2 x double> @getsminf64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz ole <2 x double> %a, %b
- %1 = select nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
+ %1 = select nnan nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
}
diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index 62e3d1d1c34e5..abce82087c5ba 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -497,12 +497,20 @@ define float @select_fcmp_olt_pos_zero(float %x) {
; CHECK-LABEL: select_fcmp_olt_pos_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.w.x fa5, zero
-; CHECK-NEXT: fmin.s fa0, fa0, fa5
+; CHECK-NEXT: flt.s a0, fa0, fa5
+; CHECK-NEXT: bnez a0, .LBB21_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s fa0, fa5
+; CHECK-NEXT: .LBB21_2:
; CHECK-NEXT: ret
;
; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
; CHECKZFINX: # %bb.0:
-; CHECKZFINX-NEXT: fmin.s a0, a0, zero
+; CHECKZFINX-NEXT: flt.s a1, a0, zero
+; CHECKZFINX-NEXT: bnez a1, .LBB21_2
+; CHECKZFINX-NEXT: # %bb.1:
+; CHECKZFINX-NEXT: li a0, 0
+; CHECKZFINX-NEXT: .LBB21_2:
; CHECKZFINX-NEXT: ret
%cmp = fcmp olt float %x, 0.000000
%sel = select i1 %cmp, float %x, float 0.000000
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index b1b0679306a90..d28d0d6685c8a 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -64,7 +64,7 @@ define double @f4(double %dummy, double %val) {
; CHECK-NEXT: wfmaxdb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt double %val, 0.0
- %ret = select i1 %cmp, double %val, double 0.0
+ %ret = select nnan nsz i1 %cmp, double %val, double 0.0
ret double %ret
}
@@ -175,7 +175,7 @@ define float @f14(float %dummy, float %val) {
; CHECK-NEXT: wfmaxsb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt float %val, 0.0
- %ret = select i1 %cmp, float %val, float 0.0
+ %ret = select nnan nsz i1 %cmp, float %val, float 0.0
ret float %ret
}
@@ -285,7 +285,7 @@ define void @f24(ptr %ptr, ptr %dst) {
; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
%cmp = fcmp ogt fp128 %val, 0xL00000000000000000000000000000000
- %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
+ %res = select nnan nsz i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
store fp128 %res, ptr %dst
ret void
}
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
index 10f9f28e599f8..03176314f2048 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
@@ -11,7 +11,7 @@ define <2 x double> @f1(<2 x double> %val) {
; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt <2 x double> %val, zeroinitializer
- %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
+ %ret = select nnan nsz <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
ret <2 x double> %ret
}
@@ -22,7 +22,7 @@ define <2 x double> @f2(<2 x double> %val) {
; CHECK-NEXT: vfmindb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt <2 x double> %val, zeroinitializer
- %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
+ %ret = select nnan nsz <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
ret <2 x double> %ret
}
@@ -33,7 +33,7 @@ define <4 x float> @f3(<4 x float> %val) {
; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt <4 x float> %val, zeroinitializer
- %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
+ %ret = select nnan nsz <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
ret <4 x float> %ret
}
@@ -44,7 +44,7 @@ define <4 x float> @f4(<4 x float> %val) {
; CHECK-NEXT: vfminsb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt <4 x float> %val, zeroinitializer
- %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
+ %ret = select nnan nsz <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
ret <4 x float> %ret
}
diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index dca5fcd60e0ce..89678eb6293c7 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -64,7 +64,7 @@ define double @f4(double %dummy, double %val) {
; CHECK-NEXT: wfmindb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt double %val, 0.0
- %ret = select i1 %cmp, double %val, double 0.0
+ %ret = select nnan nsz i1 %cmp, double %val, double 0.0
ret double %ret
}
@@ -175,7 +175,7 @@ define float @f14(float %dummy, float %val) {
; CHECK-NEXT: wfminsb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt float %val, 0.0
- %ret = select i1 %cmp, float %val, float 0.0
+ %ret = select nnan nsz i1 %cmp, float %val, float 0.0
ret float %ret
}
@@ -285,7 +285,7 @@ define void @f24(ptr %ptr, ptr %dst) {
; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
%cmp = fcmp olt fp128 %val, 0xL00000000000000000000000000000000
- %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
+ %res = select nnan nsz i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
store fp128 %res, ptr %dst
ret void
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index d536e6b72ac9c..7e1454c5f3683 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -247,10 +247,26 @@ entry:
define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: maxnm_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vmaxnm.f32 s3, s7, s3
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s6, s2
-; CHECK-MVE-NEXT: vmaxnm.f32 s1, s5, s1
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f32 s7, s3
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f32 s5, s1
+; CHECK-MVE-NEXT: cset r1, gt
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f32 s6, s2
+; CHECK-MVE-NEXT: cset r2, gt
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s6
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s5
+; CHECK-MVE-NEXT: cmp r1, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s7
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s4
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: maxnm_float32_t:
@@ -259,7 +275,7 @@ define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x floa
; CHECK-MVEFP-NEXT: bx lr
entry:
%cmp = fcmp fast ogt <4 x float> %src2, %src1
- %0 = select <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1
+ %0 = select nnan nsz <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1
ret <4 x float> %0
}
@@ -268,23 +284,55 @@ define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half>
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vmovx.f16 s8, s0
; CHECK-MVE-NEXT: vmovx.f16 s10, s4
-; CHECK-MVE-NEXT: vminnm.f16 s0, s4, s0
-; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8
-; CHECK-MVE-NEXT: vins.f16 s0, s8
+; CHECK-MVE-NEXT: vcmp.f16 s10, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s4, s0
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s8, s10, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s0, s4, s0
; CHECK-MVE-NEXT: vmovx.f16 s4, s1
+; CHECK-MVE-NEXT: vins.f16 s0, s8
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
-; CHECK-MVE-NEXT: vminnm.f16 s1, s5, s1
-; CHECK-MVE-NEXT: vminnm.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vcmp.f16 s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s5, s1
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s1, s5, s1
; CHECK-MVE-NEXT: vins.f16 s1, s4
; CHECK-MVE-NEXT: vmovx.f16 s4, s2
-; CHECK-MVE-NEXT: vminnm.f16 s2, s6, s2
-; CHECK-MVE-NEXT: vminnm.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vcmp.f16 s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s6, s2
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s2, s6, s2
+; CHECK-MVE-NEXT: vmovx.f16 s6, s7
; CHECK-MVE-NEXT: vins.f16 s2, s4
; CHECK-MVE-NEXT: vmovx.f16 s4, s3
-; CHECK-MVE-NEXT: vmovx.f16 s6, s7
-; CHECK-MVE-NEXT: vminnm.f16 s3, s7, s3
-; CHECK-MVE-NEXT: vminnm.f16 s4, s6, s4
+; CHECK-MVE-NEXT: vcmp.f16 s6, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s7, s3
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s3, s7, s3
; CHECK-MVE-NEXT: vins.f16 s3, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -294,7 +342,7 @@ define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half>
; CHECK-MVEFP-NEXT: bx lr
entry:
%cmp = fcmp fast ogt <8 x half> %src2, %src1
- %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
+ %0 = select nnan nsz <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
ret <8 x half> %0
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
index eeb1d0d1e7dbc..751a5dcbda177 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
@@ -747,7 +747,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32(<4 x float> %z, <4 x flo
entry:
%c = fcmp oeq <4 x float> %z, zeroinitializer
%a1 = fcmp fast olt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
ret <4 x float> %b
}
@@ -761,7 +761,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16(<8 x half> %z, <8 x half>
entry:
%c = fcmp oeq <8 x half> %z, zeroinitializer
%a1 = fcmp fast olt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
ret <8 x half> %b
}
@@ -775,7 +775,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32(<4 x float> %z, <4 x flo
entry:
%c = fcmp oeq <4 x float> %z, zeroinitializer
%a1 = fcmp fast ogt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
ret <4 x float> %b
}
@@ -789,7 +789,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16(<8 x half> %z, <8 x half>
entry:
%c = fcmp oeq <8 x half> %z, zeroinitializer
%a1 = fcmp fast ogt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
ret <8 x half> %b
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
index de7af894bd4fb..e0350ff2fdf0e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
@@ -859,7 +859,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x f
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
%a1 = fcmp fast olt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
ret <4 x float> %b
}
@@ -874,7 +874,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x hal
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
%a1 = fcmp fast olt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
ret <8 x half> %b
}
@@ -889,7 +889,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x f
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
%a1 = fcmp fast ogt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
ret <4 x float> %b
}
@@ -904,7 +904,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x hal
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
%a1 = fcmp fast ogt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
ret <8 x half> %b
}
@@ -2435,7 +2435,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x f
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
%a1 = fcmp fast olt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
ret <4 x float> %b
}
@@ -2451,7 +2451,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x hal
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
%a1 = fcmp fast olt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
ret <8 x half> %b
}
@@ -2467,7 +2467,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x f
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
%a1 = fcmp fast ogt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
ret <4 x float> %b
}
@@ -2483,7 +2483,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x hal
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
%a1 = fcmp fast ogt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
ret <8 x half> %b
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
index 080c6c1a1efdc..e45a6281abb27 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
@@ -913,7 +913,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x f
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
%a1 = fcmp fast olt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
ret <4 x float> %b
}
@@ -928,7 +928,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x hal
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
%a1 = fcmp fast olt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
ret <8 x half> %b
}
@@ -943,7 +943,7 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x f
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
%a1 = fcmp fast ogt <4 x float> %x, %y
- %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
+ %a = select nnan nsz <4 x i1> %a1, <4 x float> %x, <4 x float> %y
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
ret <4 x float> %b
}
@@ -958,7 +958,7 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x hal
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
%a1 = fcmp fast ogt <8 x half> %x, %y
- %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
+ %a = select nnan nsz <8 x i1> %a1, <8 x half> %x, <8 x half> %y
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
ret <8 x half> %b
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index be737961e3ae7..26440f5087780 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -372,7 +372,7 @@ define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) {
entry:
%z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
%c = fcmp fast olt float %y, %z
- %r = select i1 %c, float %y, float %z
+ %r = select nnan nsz i1 %c, float %y, float %z
ret float %r
}
@@ -395,7 +395,7 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
entry:
%z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
%c = fcmp fast olt float %y, %z
- %r = select i1 %c, float %y, float %z
+ %r = select nnan nsz i1 %c, float %y, float %z
ret float %r
}
@@ -423,7 +423,7 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
entry:
%z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
%c = fcmp fast olt float %y, %z
- %r = select i1 %c, float %y, float %z
+ %r = select nnan nsz i1 %c, float %y, float %z
ret float %r
}
@@ -450,7 +450,7 @@ define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
%c = fcmp fast olt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -464,7 +464,7 @@ define arm_aapcs_vfpcc half @fmin_v2f16_acc(<2 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
%c = fcmp fast olt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -497,7 +497,7 @@ define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
%c = fcmp fast olt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -543,7 +543,7 @@ define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
%c = fcmp fast olt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -555,7 +555,7 @@ define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
entry:
%z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
%c = fcmp fast olt double %y, %z
- %r = select i1 %c, double %y, double %z
+ %r = select nnan nsz i1 %c, double %y, double %z
ret double %r
}
@@ -568,7 +568,7 @@ define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) {
entry:
%z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
%c = fcmp fast olt double %y, %z
- %r = select i1 %c, double %y, double %z
+ %r = select nnan nsz i1 %c, double %y, double %z
ret double %r
}
@@ -587,7 +587,7 @@ define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) {
entry:
%z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
%c = fcmp fast olt double %y, %z
- %r = select i1 %c, double %y, double %z
+ %r = select nnan nsz i1 %c, double %y, double %z
ret double %r
}
@@ -1198,7 +1198,7 @@ define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) {
entry:
%z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
%c = fcmp fast ogt float %y, %z
- %r = select i1 %c, float %y, float %z
+ %r = select nnan nsz i1 %c, float %y, float %z
ret float %r
}
@@ -1221,7 +1221,7 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
entry:
%z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
%c = fcmp fast ogt float %y, %z
- %r = select i1 %c, float %y, float %z
+ %r = select nnan nsz i1 %c, float %y, float %z
ret float %r
}
@@ -1249,7 +1249,7 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
entry:
%z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
%c = fcmp fast ogt float %y, %z
- %r = select i1 %c, float %y, float %z
+ %r = select nnan nsz i1 %c, float %y, float %z
ret float %r
}
@@ -1263,7 +1263,7 @@ define arm_aapcs_vfpcc half @fmax_v2f16_acc(<2 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
%c = fcmp fast ogt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -1290,7 +1290,7 @@ define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
%c = fcmp fast ogt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -1323,7 +1323,7 @@ define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
%c = fcmp fast ogt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -1369,7 +1369,7 @@ define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
entry:
%z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
%c = fcmp fast ogt half %y, %z
- %r = select i1 %c, half %y, half %z
+ %r = select nnan nsz i1 %c, half %y, half %z
ret half %r
}
@@ -1381,7 +1381,7 @@ define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
entry:
%z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
%c = fcmp fast ogt double %y, %z
- %r = select i1 %c, double %y, double %z
+ %r = select nnan nsz i1 %c, double %y, double %z
ret double %r
}
@@ -1394,7 +1394,7 @@ define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) {
entry:
%z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
%c = fcmp fast ogt double %y, %z
- %r = select i1 %c, double %y, double %z
+ %r = select nnan nsz i1 %c, double %y, double %z
ret double %r
}
@@ -1413,7 +1413,7 @@ define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) {
entry:
%z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
%c = fcmp fast ogt double %y, %z
- %r = select i1 %c, double %y, double %z
+ %r = select nnan nsz i1 %c, double %y, double %z
ret double %r
}
>From 1f0394697659f6a6fd66c59b4209c0e0a3b1c6bb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 19 Jan 2026 04:43:30 +0800
Subject: [PATCH 4/4] [AMDGPU] Fix AMDGPU tests. NFC.
---
llvm/test/CodeGen/AMDGPU/fmax_legacy.ll | 18 +++++++++++-----
llvm/test/CodeGen/AMDGPU/fmin_legacy.ll | 28 +++++++++++++++++--------
2 files changed, 32 insertions(+), 14 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index f3a84e6e45260..b7830d13ce8e9 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -265,7 +265,9 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; GCN: v_mul_f32_e32 [[CA:v[0-9]+]], 1.0, [[A]]
+; GCN: v_mul_f32_e32 [[CB:v[0-9]+]], 1.0, [[B]]
+; GCN: v_max_f32_e32 {{v[0-9]+}}, [[CA]], [[CB]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -312,11 +314,17 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32_fast:
-; GCN: v_max_f32_e32
-; GCN: v_max_f32_e32
-; GCN: v_max_f32_e32
+; VI: v_cmp_gt_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_gt_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_gt_f32_e32
+; VI: v_cndmask_b32_e32
-; GCN-NOT: v_max
+; SI: v_max_legacy_f32_e32
+; SI: v_max_legacy_f32_e32
+; SI: v_max_legacy_f32_e32
+; SI-NOT: v_max_
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 39eefa1879870..146864314b301 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -306,7 +306,9 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(ptr addrspace(1) %out, ptr
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; GCN: v_mul_f32_e32 [[CA:v[0-9]+]], 1.0, [[A]]
+; GCN: v_mul_f32_e32 [[CB:v[0-9]+]], 1.0, [[B]]
+; GCN: v_min_f32_e32 {{v[0-9]+}}, [[CA]], [[CB]]
define amdgpu_kernel void @test_fmin_legacy_ult_v1f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
@@ -349,8 +351,12 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(ptr addrspace(1) %out, ptr
; GCN: {{buffer|flat}}_load_dwordx2
; GCN: {{buffer|flat}}_load_dwordx2
-; GCN: v_min_f32_e32
-; GCN: v_min_f32_e32
+; SI: v_min_legacy_f32_e32
+; SI: v_min_legacy_f32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
define amdgpu_kernel void @test_fmin_legacy_ult_v2f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <2 x float>, ptr addrspace(1) %in, i32 %tid
@@ -394,13 +400,17 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr
}
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32_fast:
-; VI-NOT: v_cmp
-; VI-NOT: v_cndmask
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
+; VI: v_cmp_nge_f32_e32
+; VI: v_cndmask_b32_e32
-; GCN: v_min_f32_e32
-; GCN: v_min_f32_e32
-; GCN: v_min_f32_e32
-; GCN-NOT: v_min_
+; SI: v_min_legacy_f32_e32
+; SI: v_min_legacy_f32_e32
+; SI: v_min_legacy_f32_e32
+; SI-NOT: v_min_
define amdgpu_kernel void @test_fmin_legacy_ult_v3f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
More information about the llvm-commits
mailing list