[llvm] [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform (PR #184590)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 00:18:03 PST 2026
https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/184590
>From c5a844d1b2de244b5bf201642ce4b141094273f3 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 4 Mar 2026 10:10:39 +0100
Subject: [PATCH 1/2] [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum
transform
minnum/maxnum don't have the correct sNaN semantics, we must
convert to minimumnum/maximumnum instead.
To avoid an NVPTX regression, make it handle fmaximmumnum in one
TableGen pattern.
---
.../SelectionDAG/SelectionDAGBuilder.cpp | 10 +-
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 10 +-
llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll | 6 +-
llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll | 86 +++++++----
.../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 7 +-
llvm/test/CodeGen/ARM/vminmaxnm-safe.ll | 138 +++++++++++-------
llvm/test/CodeGen/SystemZ/vec-max-05.ll | 16 +-
.../CodeGen/SystemZ/vec-max-min-zerosplat.ll | 12 +-
llvm/test/CodeGen/SystemZ/vec-min-05.ll | 16 +-
.../CodeGen/X86/avx512-broadcast-unfold.ll | 60 +++++---
llvm/test/CodeGen/X86/sse-minmax.ll | 22 +--
11 files changed, 242 insertions(+), 141 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMINIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMAXIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
return N->hasOneUse() &&
(N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
}]>;
-// fmaxnum will differentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
- (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum differentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+ (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
: BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
!if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
"fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
- [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI14_0
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI15_1
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI21_0
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI22_1
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q9, q8
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI20_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
define double @fp_armv8_vminnm_NNNole(double %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNole:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, .LCPI21_0
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vldr d17, .LCPI21_1
-; CHECK-NEXT: vminnm.f64 d16, d18, d16
-; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vldr d18, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d18, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI24_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNule:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI25_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI25_1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI28_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI29_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI29_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI32_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -632,12 +658,14 @@ define float @fp_armv8_vmaxnm_NNNuge(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNuge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI33_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI33_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI33_1
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -662,7 +690,9 @@ define float @fp_armv8_vmaxnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -687,7 +717,9 @@ define double @fp_armv8_vmaxnm_NNNule_rev( double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d17, d16
-; CHECK-NEXT: vmaxnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -714,7 +746,9 @@ define float @fp_armv8_vminmaxnm_0(float %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vmov.f32 s4, s2
; CHECK-NEXT: vmovlt.f32 s4, s0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s2
+; CHECK-NEXT: vcmp.f32 s4, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -733,10 +767,12 @@ define float @fp_armv8_vminmaxnm_neg0(float %a) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI37_0
; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: vminnm.f32 s2, s2, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -758,7 +794,9 @@ define float @fp_armv8_vminmaxnm_e_0(float %a) {
; CHECK-NEXT: vcmp.f32 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmovle.f32 s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -777,7 +815,9 @@ define float @fp_armv8_vminmaxnm_e_neg0(float %a) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI39_0
; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: vminnm.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index b1b0679306a90..f825ff376e8bf 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -60,8 +60,11 @@ define double @f3(double %dummy, double %val1, double %val2) {
define double @f4(double %dummy, double %val) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
+; CHECK-NEXT: ltdbr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: lzdr %f0
-; CHECK-NEXT: wfmaxdb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
@@ -171,8 +174,11 @@ define float @f13(float %dummy, float %val1, float %val2) {
define float @f14(float %dummy, float %val) {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
+; CHECK-NEXT: ltebr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB11_1:
; CHECK-NEXT: lzer %f0
-; CHECK-NEXT: wfmaxsb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
@@ -280,7 +286,11 @@ define void @f24(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r2), 3
; CHECK-NEXT: vzero %v1
-; CHECK-NEXT: wfmaxxb %v0, %v0, %v1, 4
+; CHECK-NEXT: wfcxb %v0, %v1
+; CHECK-NEXT: jh .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vzero %v0
+; CHECK-NEXT: .LBB18_2:
; CHECK-NEXT: vst %v0, 0(%r3), 3
; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
index 10f9f28e599f8..2cca55e2afc5c 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
@@ -8,7 +8,8 @@ define <2 x double> @f1(<2 x double> %val) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 4
+; CHECK-NEXT: vfchdb %v0, %v24, %v0
+; CHECK-NEXT: vn %v24, %v0, %v24
; CHECK-NEXT: br %r14
%cmp = fcmp ogt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
@@ -19,7 +20,8 @@ define <2 x double> @f2(<2 x double> %val) {
; CHECK-LABEL: f2:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfmindb %v24, %v24, %v0, 4
+; CHECK-NEXT: vfchdb %v0, %v0, %v24
+; CHECK-NEXT: vn %v24, %v0, %v24
; CHECK-NEXT: br %r14
%cmp = fcmp olt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
@@ -30,7 +32,8 @@ define <4 x float> @f3(<4 x float> %val) {
; CHECK-LABEL: f3:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 4
+; CHECK-NEXT: vfchsb %v0, %v24, %v0
+; CHECK-NEXT: vn %v24, %v0, %v24
; CHECK-NEXT: br %r14
%cmp = fcmp ogt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
@@ -41,7 +44,8 @@ define <4 x float> @f4(<4 x float> %val) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfminsb %v24, %v24, %v0, 4
+; CHECK-NEXT: vfchsb %v0, %v0, %v24
+; CHECK-NEXT: vn %v24, %v0, %v24
; CHECK-NEXT: br %r14
%cmp = fcmp olt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index dca5fcd60e0ce..2f4d0cdbb9b87 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -60,8 +60,11 @@ define double @f3(double %dummy, double %val1, double %val2) {
define double @f4(double %dummy, double %val) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
+; CHECK-NEXT: ltdbr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: lzdr %f0
-; CHECK-NEXT: wfmindb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
@@ -171,8 +174,11 @@ define float @f13(float %dummy, float %val1, float %val2) {
define float @f14(float %dummy, float %val) {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
+; CHECK-NEXT: ltebr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB11_1:
; CHECK-NEXT: lzer %f0
-; CHECK-NEXT: wfminsb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
@@ -280,7 +286,11 @@ define void @f24(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r2), 3
; CHECK-NEXT: vzero %v1
-; CHECK-NEXT: wfminxb %v0, %v0, %v1, 4
+; CHECK-NEXT: wfcxb %v0, %v1
+; CHECK-NEXT: jl .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vzero %v0
+; CHECK-NEXT: .LBB18_2:
; CHECK-NEXT: vst %v0, 0(%r3), 3
; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
index 2f86499a2df9e..4f1ba8d35e5f1 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
@@ -1974,8 +1974,9 @@ define void @bcast_unfold_fmax_v4f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB60_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltps 4096(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT: vmaxps %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovups %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB60_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2006,8 +2007,9 @@ define void @bcast_unfold_fmax_v8f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB61_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltps 4096(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT: vmaxps %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovups %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB61_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2039,8 +2041,9 @@ define void @bcast_unfold_fmax_v16f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB62_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltps 4096(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT: vmaxps %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovups %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB62_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2073,8 +2076,9 @@ define void @bcast_unfold_fmax_v2f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB63_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltpd 8192(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovupd %xmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
+; CHECK-NEXT: vmaxpd %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB63_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2105,8 +2109,9 @@ define void @bcast_unfold_fmax_v4f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB64_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltpd 8192(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT: vmaxpd %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovupd %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB64_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2138,8 +2143,9 @@ define void @bcast_unfold_fmax_v8f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB65_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltpd 8192(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT: vmaxpd %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovupd %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB65_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2171,8 +2177,9 @@ define void @bcast_unfold_fmin_v4f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB66_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtps 4096(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT: vminps %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovups %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB66_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2203,8 +2210,9 @@ define void @bcast_unfold_fmin_v8f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB67_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtps 4096(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT: vminps %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovups %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB67_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2236,8 +2244,9 @@ define void @bcast_unfold_fmin_v16f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB68_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtps 4096(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT: vminps %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovups %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB68_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2270,8 +2279,9 @@ define void @bcast_unfold_fmin_v2f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB69_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtpd 8192(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovupd %xmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
+; CHECK-NEXT: vminpd %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB69_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2302,8 +2312,9 @@ define void @bcast_unfold_fmin_v4f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB70_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtpd 8192(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT: vminpd %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovupd %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB70_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2335,8 +2346,9 @@ define void @bcast_unfold_fmin_v8f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB71_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtpd 8192(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT: vminpd %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovupd %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB71_1
; CHECK-NEXT: # %bb.2: # %bb10
diff --git a/llvm/test/CodeGen/X86/sse-minmax.ll b/llvm/test/CodeGen/X86/sse-minmax.ll
index 2b97f98450973..c1fb1c6b87a23 100644
--- a/llvm/test/CodeGen/X86/sse-minmax.ll
+++ b/llvm/test/CodeGen/X86/sse-minmax.ll
@@ -365,11 +365,8 @@ define double @ult_x(double %x) {
define double @ugt_inverse_x(double %x) {
; CHECK-LABEL: ugt_inverse_x:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorpd %xmm2, %xmm2
-; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: cmpnlesd %xmm2, %xmm1
-; CHECK-NEXT: andnpd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: retq
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -380,9 +377,7 @@ define double @ult_inverse_x(double %x) {
; CHECK-LABEL: ult_inverse_x:
; CHECK: # %bb.0:
; CHECK-NEXT: xorpd %xmm1, %xmm1
-; CHECK-NEXT: cmpnlesd %xmm0, %xmm1
-; CHECK-NEXT: andnpd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: retq
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -583,10 +578,7 @@ define double @ult_y(double %x) {
define double @ugt_inverse_y(double %x) {
; CHECK-LABEL: ugt_inverse_y:
; CHECK: # %bb.0:
-; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: cmpnlesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = fcmp ugt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
@@ -596,11 +588,7 @@ define double @ugt_inverse_y(double %x) {
define double @ult_inverse_y(double %x) {
; CHECK-LABEL: ult_inverse_y:
; CHECK: # %bb.0:
-; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
-; CHECK-NEXT: cmpnlesd %xmm1, %xmm0
-; CHECK-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = fcmp ult double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
>From 3360021b95dbcdc24bba2efc9998a6add3fca48f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 5 Mar 2026 09:14:38 +0100
Subject: [PATCH 2/2] Restore SystemZ tests after rebase
---
llvm/test/CodeGen/SystemZ/vec-max-05.ll | 16 +++-------------
.../CodeGen/SystemZ/vec-max-min-zerosplat.ll | 12 ++++--------
llvm/test/CodeGen/SystemZ/vec-min-05.ll | 16 +++-------------
3 files changed, 10 insertions(+), 34 deletions(-)
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index f825ff376e8bf..b1b0679306a90 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -60,11 +60,8 @@ define double @f3(double %dummy, double %val1, double %val2) {
define double @f4(double %dummy, double %val) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
-; CHECK-NEXT: ltdbr %f1, %f2
-; CHECK-NEXT: ldr %f0, %f2
-; CHECK-NEXT: bhr %r14
-; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: lzdr %f0
+; CHECK-NEXT: wfmaxdb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
@@ -174,11 +171,8 @@ define float @f13(float %dummy, float %val1, float %val2) {
define float @f14(float %dummy, float %val) {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
-; CHECK-NEXT: ltebr %f1, %f2
-; CHECK-NEXT: ldr %f0, %f2
-; CHECK-NEXT: bhr %r14
-; CHECK-NEXT: .LBB11_1:
; CHECK-NEXT: lzer %f0
+; CHECK-NEXT: wfmaxsb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
@@ -286,11 +280,7 @@ define void @f24(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r2), 3
; CHECK-NEXT: vzero %v1
-; CHECK-NEXT: wfcxb %v0, %v1
-; CHECK-NEXT: jh .LBB18_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: vzero %v0
-; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: wfmaxxb %v0, %v0, %v1, 4
; CHECK-NEXT: vst %v0, 0(%r3), 3
; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
index 2cca55e2afc5c..10f9f28e599f8 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
@@ -8,8 +8,7 @@ define <2 x double> @f1(<2 x double> %val) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfchdb %v0, %v24, %v0
-; CHECK-NEXT: vn %v24, %v0, %v24
+; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
@@ -20,8 +19,7 @@ define <2 x double> @f2(<2 x double> %val) {
; CHECK-LABEL: f2:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfchdb %v0, %v0, %v24
-; CHECK-NEXT: vn %v24, %v0, %v24
+; CHECK-NEXT: vfmindb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
@@ -32,8 +30,7 @@ define <4 x float> @f3(<4 x float> %val) {
; CHECK-LABEL: f3:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfchsb %v0, %v24, %v0
-; CHECK-NEXT: vn %v24, %v0, %v24
+; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp ogt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
@@ -44,8 +41,7 @@ define <4 x float> @f4(<4 x float> %val) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
-; CHECK-NEXT: vfchsb %v0, %v0, %v24
-; CHECK-NEXT: vn %v24, %v0, %v24
+; CHECK-NEXT: vfminsb %v24, %v24, %v0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index 2f4d0cdbb9b87..dca5fcd60e0ce 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -60,11 +60,8 @@ define double @f3(double %dummy, double %val1, double %val2) {
define double @f4(double %dummy, double %val) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
-; CHECK-NEXT: ltdbr %f1, %f2
-; CHECK-NEXT: ldr %f0, %f2
-; CHECK-NEXT: blr %r14
-; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: lzdr %f0
+; CHECK-NEXT: wfmindb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
@@ -174,11 +171,8 @@ define float @f13(float %dummy, float %val1, float %val2) {
define float @f14(float %dummy, float %val) {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
-; CHECK-NEXT: ltebr %f1, %f2
-; CHECK-NEXT: ldr %f0, %f2
-; CHECK-NEXT: blr %r14
-; CHECK-NEXT: .LBB11_1:
; CHECK-NEXT: lzer %f0
+; CHECK-NEXT: wfminsb %f0, %f2, %f0, 4
; CHECK-NEXT: br %r14
%cmp = fcmp olt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
@@ -286,11 +280,7 @@ define void @f24(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r2), 3
; CHECK-NEXT: vzero %v1
-; CHECK-NEXT: wfcxb %v0, %v1
-; CHECK-NEXT: jl .LBB18_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: vzero %v0
-; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: wfminxb %v0, %v0, %v1, 4
; CHECK-NEXT: vst %v0, 0(%r3), 3
; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
More information about the llvm-commits
mailing list