[llvm] 0418700 - [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform (#184590)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 00:47:08 PST 2026
Author: Nikita Popov
Date: 2026-03-05T08:47:01Z
New Revision: 0418700f047950e71b5c94a31f2ed76f976e6090
URL: https://github.com/llvm/llvm-project/commit/0418700f047950e71b5c94a31f2ed76f976e6090
DIFF: https://github.com/llvm/llvm-project/commit/0418700f047950e71b5c94a31f2ed76f976e6090.diff
LOG: [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform (#184590)
minnum/maxnum don't have the correct sNaN semantics, we must convert to
minimumnum/maximumnum instead.
To avoid an NVPTX regression, make it handle fmaximmumnum in one
TableGen pattern.
This is intended as a targeted fix for the miscompile, as the complete
removal of this transform (#93575) appears to be blocked.
Fixes https://github.com/llvm/llvm-project/issues/176624.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
llvm/test/CodeGen/X86/sse-minmax.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMINIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMAXIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
return N->hasOneUse() &&
(N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
}]>;
-// fmaxnum will
diff erentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
- (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum
diff erentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+ (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
: BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
!if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
"fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
- [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI14_0
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI15_1
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI21_0
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI22_1
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q9, q8
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI20_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
define double @fp_armv8_vminnm_NNNole(double %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNole:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, .LCPI21_0
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vldr d17, .LCPI21_1
-; CHECK-NEXT: vminnm.f64 d16, d18, d16
-; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vldr d18, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d18, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI24_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNule:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI25_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI25_1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI28_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI29_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI29_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI32_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -632,12 +658,14 @@ define float @fp_armv8_vmaxnm_NNNuge(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNuge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI33_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI33_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI33_1
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -662,7 +690,9 @@ define float @fp_armv8_vmaxnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -687,7 +717,9 @@ define double @fp_armv8_vmaxnm_NNNule_rev( double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d17, d16
-; CHECK-NEXT: vmaxnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -714,7 +746,9 @@ define float @fp_armv8_vminmaxnm_0(float %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vmov.f32 s4, s2
; CHECK-NEXT: vmovlt.f32 s4, s0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s2
+; CHECK-NEXT: vcmp.f32 s4, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -733,10 +767,12 @@ define float @fp_armv8_vminmaxnm_neg0(float %a) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI37_0
; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: vminnm.f32 s2, s2, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -758,7 +794,9 @@ define float @fp_armv8_vminmaxnm_e_0(float %a) {
; CHECK-NEXT: vcmp.f32 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmovle.f32 s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -777,7 +815,9 @@ define float @fp_armv8_vminmaxnm_e_neg0(float %a) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI39_0
; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: vminnm.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
index 2f86499a2df9e..4f1ba8d35e5f1 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
@@ -1974,8 +1974,9 @@ define void @bcast_unfold_fmax_v4f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB60_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltps 4096(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT: vmaxps %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovups %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB60_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2006,8 +2007,9 @@ define void @bcast_unfold_fmax_v8f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB61_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltps 4096(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT: vmaxps %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovups %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB61_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2039,8 +2041,9 @@ define void @bcast_unfold_fmax_v16f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB62_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltps 4096(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT: vmaxps %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovups %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB62_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2073,8 +2076,9 @@ define void @bcast_unfold_fmax_v2f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB63_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltpd 8192(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovupd %xmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
+; CHECK-NEXT: vmaxpd %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB63_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2105,8 +2109,9 @@ define void @bcast_unfold_fmax_v4f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB64_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltpd 8192(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT: vmaxpd %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovupd %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB64_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2138,8 +2143,9 @@ define void @bcast_unfold_fmax_v8f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB65_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpnltpd 8192(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT: vmaxpd %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovupd %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB65_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2171,8 +2177,9 @@ define void @bcast_unfold_fmin_v4f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB66_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtps 4096(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT: vminps %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovups %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB66_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2203,8 +2210,9 @@ define void @bcast_unfold_fmin_v8f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB67_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtps 4096(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT: vminps %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovups %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB67_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2236,8 +2244,9 @@ define void @bcast_unfold_fmin_v16f32(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB68_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtps 4096(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT: vminps %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovups %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB68_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2270,8 +2279,9 @@ define void @bcast_unfold_fmin_v2f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB69_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtpd 8192(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT: vmovupd %xmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
+; CHECK-NEXT: vminpd %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB69_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2302,8 +2312,9 @@ define void @bcast_unfold_fmin_v4f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB70_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtpd 8192(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT: vminpd %ymm0, %ymm1, %ymm1
+; CHECK-NEXT: vmovupd %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB70_1
; CHECK-NEXT: # %bb.2: # %bb10
@@ -2335,8 +2346,9 @@ define void @bcast_unfold_fmin_v8f64(ptr %arg) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB71_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vcmpngtpd 8192(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT: vminpd %zmm0, %zmm1, %zmm1
+; CHECK-NEXT: vmovupd %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB71_1
; CHECK-NEXT: # %bb.2: # %bb10
diff --git a/llvm/test/CodeGen/X86/sse-minmax.ll b/llvm/test/CodeGen/X86/sse-minmax.ll
index 2b97f98450973..c1fb1c6b87a23 100644
--- a/llvm/test/CodeGen/X86/sse-minmax.ll
+++ b/llvm/test/CodeGen/X86/sse-minmax.ll
@@ -365,11 +365,8 @@ define double @ult_x(double %x) {
define double @ugt_inverse_x(double %x) {
; CHECK-LABEL: ugt_inverse_x:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorpd %xmm2, %xmm2
-; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: cmpnlesd %xmm2, %xmm1
-; CHECK-NEXT: andnpd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: retq
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -380,9 +377,7 @@ define double @ult_inverse_x(double %x) {
; CHECK-LABEL: ult_inverse_x:
; CHECK: # %bb.0:
; CHECK-NEXT: xorpd %xmm1, %xmm1
-; CHECK-NEXT: cmpnlesd %xmm0, %xmm1
-; CHECK-NEXT: andnpd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: retq
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -583,10 +578,7 @@ define double @ult_y(double %x) {
define double @ugt_inverse_y(double %x) {
; CHECK-LABEL: ugt_inverse_y:
; CHECK: # %bb.0:
-; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: cmpnlesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = fcmp ugt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
@@ -596,11 +588,7 @@ define double @ugt_inverse_y(double %x) {
define double @ult_inverse_y(double %x) {
; CHECK-LABEL: ult_inverse_y:
; CHECK: # %bb.0:
-; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
-; CHECK-NEXT: cmpnlesd %xmm1, %xmm0
-; CHECK-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = fcmp ult double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
More information about the llvm-commits
mailing list