[llvm] 0418700 - [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform (#184590)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 00:47:08 PST 2026


Author: Nikita Popov
Date: 2026-03-05T08:47:01Z
New Revision: 0418700f047950e71b5c94a31f2ed76f976e6090

URL: https://github.com/llvm/llvm-project/commit/0418700f047950e71b5c94a31f2ed76f976e6090
DIFF: https://github.com/llvm/llvm-project/commit/0418700f047950e71b5c94a31f2ed76f976e6090.diff

LOG: [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform (#184590)

minnum/maxnum don't have the correct sNaN semantics, we must convert to
minimumnum/maximumnum instead.

To avoid an NVPTX regression, make it handle fmaximmumnum in one
TableGen pattern.

This is intended as a targeted fix for the miscompile, as the complete
removal of this transform (#93575) appears to be blocked.

Fixes https://github.com/llvm/llvm-project/issues/176624.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
    llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
    llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
    llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
    llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
    llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
    llvm/test/CodeGen/X86/sse-minmax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
       switch (SPR.NaNBehavior) {
       case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
       case SPNB_RETURNS_NAN: break;
-      case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+      case SPNB_RETURNS_OTHER:
+        Opc = ISD::FMINIMUMNUM;
+        Flags.setNoSignedZeros(true);
+        break;
       case SPNB_RETURNS_ANY:
         if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
             (UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
       switch (SPR.NaNBehavior) {
       case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
       case SPNB_RETURNS_NAN: break;
-      case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+      case SPNB_RETURNS_OTHER:
+        Opc = ISD::FMAXIMUMNUM;
+        Flags.setNoSignedZeros(true);
+        break;
       case SPNB_RETURNS_ANY:
         if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
             (UseScalarMinMax &&

diff  --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
   return N->hasOneUse() &&
     (N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
 }]>;
-// fmaxnum will 
diff erentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
-                                  (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum 
diff erentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+                                  (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
   return N->getFlags().hasNoSignedZeros();
 }]>;
 
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
   : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
                    !if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
                    "fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
-                   [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+                   [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
 
 let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
   def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
 }
 
 ; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
 define double @test_cross_fail_nan(float %in) {
 ; CHECK-LABEL: test_cross_fail_nan:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d1, #0000000000000000
-; CHECK-NEXT:    fminnm s0, s0, s1
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    fcsel s0, s0, s1, mi
 ; CHECK-NEXT:    fcvt d0, s0
 ; CHECK-NEXT:    ret
   %cmp = fcmp olt float %in, 0.000000e+00

diff  --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.f16 s0, r0
 ; CHECK-NEXT:    vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT:    vminnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vldr.16 s2, .LCPI12_0
 ; CHECK-NEXT:    vcmp.f16 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f16 s0, s2, s0
 ; CHECK-NEXT:    vldr.16 s2, .LCPI13_1
-; CHECK-NEXT:    vminnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.f16 s0, r0
 ; CHECK-NEXT:    vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT:    vminnm.f16 s0, s0, s2
-; CHECK-NEXT:    vldr.16 s2, .LCPI14_0
 ; CHECK-NEXT:    vcmp.f16 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselgt.f16 s0, s0, s2
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
+; CHECK-NEXT:    vldr.16 s2, .LCPI14_0
+; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldr.16 s2, .LCPI15_0
 ; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    vminnm.f16 s0, s0, s2
-; CHECK-NEXT:    vldr.16 s2, .LCPI15_1
 ; CHECK-NEXT:    vcmp.f16 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselge.f16 s0, s0, s2
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
+; CHECK-NEXT:    vldr.16 s2, .LCPI15_1
+; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f16 s0, s2, s0
 ; CHECK-NEXT:    vldr.16 s2, .LCPI16_1
-; CHECK-NEXT:    vminnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.f16 s0, r0
 ; CHECK-NEXT:    vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vldr.16 s2, .LCPI17_0
 ; CHECK-NEXT:    vcmp.f16 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldr.16 s2, .LCPI18_0
 ; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
 ; CHECK-NEXT:    vldr.16 s2, .LCPI18_1
 ; CHECK-NEXT:    vcmp.f16 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f16 s0, s2, s0
 ; CHECK-NEXT:    vldr.16 s2, .LCPI19_1
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f16 s0, s2, s0
 ; CHECK-NEXT:    vldr.16 s2, .LCPI20_1
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.f16 s0, r0
 ; CHECK-NEXT:    vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT:    vldr.16 s2, .LCPI21_0
 ; CHECK-NEXT:    vcmp.f16 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselgt.f16 s0, s0, s2
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
+; CHECK-NEXT:    vldr.16 s2, .LCPI21_0
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldr.16 s2, .LCPI22_0
 ; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT:    vldr.16 s2, .LCPI22_1
 ; CHECK-NEXT:    vcmp.f16 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselge.f16 s0, s0, s2
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
+; CHECK-NEXT:    vldr.16 s2, .LCPI22_1
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -480,12 +502,14 @@ entry:
 define half @fp16_vminmaxnm_neg0(half %a) {
 ; CHECK-LABEL: fp16_vminmaxnm_neg0:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, .LCPI23_0
-; CHECK-NEXT:    vmov.f16 s2, r0
-; CHECK-NEXT:    vminnm.f16 s2, s2, s0
+; CHECK-NEXT:    vldr.16 s2, .LCPI23_0
+; CHECK-NEXT:    vmov.f16 s0, r0
 ; CHECK-NEXT:    vcmp.f16 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselgt.f16 s0, s2, s0
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
 ; CHECK-NEXT:    vcmp.f16 s0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f16 s0, s2, s0
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s0, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1
@@ -526,12 +552,14 @@ entry:
 define half @fp16_vminmaxnm_e_neg0(half %a) {
 ; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, .LCPI25_0
-; CHECK-NEXT:    vmov.f16 s2, r0
-; CHECK-NEXT:    vminnm.f16 s2, s2, s0
-; CHECK-NEXT:    vcmp.f16 s0, s2
+; CHECK-NEXT:    vldr.16 s2, .LCPI25_0
+; CHECK-NEXT:    vmov.f16 s0, r0
+; CHECK-NEXT:    vcmp.f16 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselge.f16 s0, s0, s2
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
+; CHECK-NEXT:    vcmp.f16 s2, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 1

diff  --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
 ; ARMV8-LABEL: pr65820:
 ; ARMV8:       @ %bb.0: @ %entry
 ; ARMV8-NEXT:    vmov d16, r2, r3
-; ARMV8-NEXT:    vmov.i32 q9, #0x0
 ; ARMV8-NEXT:    vdup.32 q8, d16[0]
-; ARMV8-NEXT:    vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT:    vcgt.f32 q9, q8, #0
+; ARMV8-NEXT:    vand q8, q9, q8
 ; ARMV8-NEXT:    vst1.32 {d16, d17}, [r0]
 ; ARMV8-NEXT:    bx lr
 ;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
 ; ARMV8M-NEXT:    vmov r1, s0
 ; ARMV8M-NEXT:    vmov.i32 q0, #0x0
 ; ARMV8M-NEXT:    vdup.32 q1, r1
-; ARMV8M-NEXT:    vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT:    vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT:    vdupt.32 q0, r1
 ; ARMV8M-NEXT:    vstrw.32 q0, [r0]
 ; ARMV8M-NEXT:    bx lr
 entry:

diff  --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
 ; CHECK-LABEL: fp_armv8_vminnm_NNNo:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT:    vldr s2, .LCPI20_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vminnm.f32 s0, s4, s0
+; CHECK-NEXT:    vldr s4, .LCPI20_0
+; CHECK-NEXT:    vmov s2, r0
 ; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
 define double @fp_armv8_vminnm_NNNole(double %a) {
 ; CHECK-LABEL: fp_armv8_vminnm_NNNole:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d16, .LCPI21_0
-; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vldr d17, .LCPI21_1
-; CHECK-NEXT:    vminnm.f64 d16, d18, d16
-; CHECK-NEXT:    vcmp.f64 d16, d17
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vldr d17, .LCPI21_0
+; CHECK-NEXT:    vldr d18, .LCPI21_1
+; CHECK-NEXT:    vcmp.f64 d17, d16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselge.f64 d16, d17, d16
+; CHECK-NEXT:    vselge.f64 d16, d16, d17
+; CHECK-NEXT:    vcmp.f64 d16, d18
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f64 d16, d18, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
 ; CHECK-NEXT:    vcmp.f32 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f32 s0, s0, s2
-; CHECK-NEXT:    vminnm.f32 s0, s0, s4
+; CHECK-NEXT:    vcmp.f32 s4, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s0, s4
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
 ; CHECK-NEXT:    vcmp.f64 d17, d16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f64 d16, d16, d17
-; CHECK-NEXT:    vminnm.f64 d16, d16, d18
+; CHECK-NEXT:    vcmp.f64 d18, d16
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f64 d16, d16, d18
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
 ; CHECK-LABEL: fp_armv8_vminnm_NNNu:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT:    vldr s2, .LCPI24_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vminnm.f32 s0, s4, s0
-; CHECK-NEXT:    vcmp.f32 s2, s0
+; CHECK-NEXT:    vldr s4, .LCPI24_0
+; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselgt.f32 s0, s0, s2
+; CHECK-NEXT:    vselge.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
 ; CHECK-LABEL: fp_armv8_vminnm_NNNule:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr s0, .LCPI25_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vldr s2, .LCPI25_1
-; CHECK-NEXT:    vminnm.f32 s0, s4, s0
-; CHECK-NEXT:    vcmp.f32 s2, s0
+; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vldr s4, .LCPI25_1
+; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselge.f32 s0, s0, s2
+; CHECK-NEXT:    vselgt.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
 ; CHECK-NEXT:    vcmp.f32 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f32 s0, s0, s2
-; CHECK-NEXT:    vminnm.f32 s0, s0, s4
+; CHECK-NEXT:    vcmp.f32 s4, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s0, s4
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
 ; CHECK-NEXT:    vcmp.f64 d17, d16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f64 d16, d16, d17
-; CHECK-NEXT:    vminnm.f64 d16, d16, d18
+; CHECK-NEXT:    vcmp.f64 d18, d16
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f64 d16, d16, d18
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
 ; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT:    vldr s2, .LCPI28_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT:    vldr s4, .LCPI28_0
+; CHECK-NEXT:    vmov s2, r0
 ; CHECK-NEXT:    vcmp.f32 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s4, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
 ; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr s0, .LCPI29_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vldr s2, .LCPI29_1
-; CHECK-NEXT:    vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vldr s4, .LCPI29_1
 ; CHECK-NEXT:    vcmp.f32 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s4, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
 ; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f32 s0, s0, s2
-; CHECK-NEXT:    vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT:    vcmp.f32 s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s0, s4
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
 ; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f32 s0, s0, s2
-; CHECK-NEXT:    vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT:    vcmp.f32 s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s0, s4
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
 ; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT:    vldr s2, .LCPI32_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT:    vcmp.f32 s0, s2
+; CHECK-NEXT:    vldr s4, .LCPI32_0
+; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vcmp.f32 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselgt.f32 s0, s0, s2
+; CHECK-NEXT:    vselge.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s4, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -632,12 +658,14 @@ define float @fp_armv8_vmaxnm_NNNuge(float %b) {
 ; CHECK-LABEL: fp_armv8_vmaxnm_NNNuge:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr s0, .LCPI33_0
-; CHECK-NEXT:    vmov s4, r0
-; CHECK-NEXT:    vldr s2, .LCPI33_1
-; CHECK-NEXT:    vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT:    vcmp.f32 s0, s2
+; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vldr s4, .LCPI33_1
+; CHECK-NEXT:    vcmp.f32 s2, s0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselge.f32 s0, s0, s2
+; CHECK-NEXT:    vselgt.f32 s0, s2, s0
+; CHECK-NEXT:    vcmp.f32 s4, s0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -662,7 +690,9 @@ define float @fp_armv8_vmaxnm_NNNu_rev(float %b) {
 ; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f32 s0, s0, s2
-; CHECK-NEXT:    vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT:    vcmp.f32 s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s0, s4
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -687,7 +717,9 @@ define double @fp_armv8_vmaxnm_NNNule_rev( double %b) {
 ; CHECK-NEXT:    vcmp.f64 d17, d16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselgt.f64 d16, d17, d16
-; CHECK-NEXT:    vmaxnm.f64 d16, d16, d18
+; CHECK-NEXT:    vcmp.f64 d16, d18
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f64 d16, d16, d18
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 3
@@ -714,7 +746,9 @@ define float @fp_armv8_vminmaxnm_0(float %a) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vmov.f32 s4, s2
 ; CHECK-NEXT:    vmovlt.f32 s4, s0
-; CHECK-NEXT:    vmaxnm.f32 s0, s4, s2
+; CHECK-NEXT:    vcmp.f32 s4, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s0, s4, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -733,10 +767,12 @@ define float @fp_armv8_vminmaxnm_neg0(float %a) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr s0, .LCPI37_0
 ; CHECK-NEXT:    vmov s2, r0
-; CHECK-NEXT:    vminnm.f32 s2, s2, s0
-; CHECK-NEXT:    vcmp.f32 s2, s0
+; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vselgt.f32 s0, s2, s0
+; CHECK-NEXT:    vselgt.f32 s2, s2, s0
+; CHECK-NEXT:    vcmp.f32 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselge.f32 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -758,7 +794,9 @@ define float @fp_armv8_vminmaxnm_e_0(float %a) {
 ; CHECK-NEXT:    vcmp.f32 s0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f32 s0, s2, s0
-; CHECK-NEXT:    vmaxnm.f32 s0, s0, s2
+; CHECK-NEXT:    vcmp.f32 s0, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vmovle.f32 s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -777,7 +815,9 @@ define float @fp_armv8_vminmaxnm_e_neg0(float %a) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr s0, .LCPI39_0
 ; CHECK-NEXT:    vmov s2, r0
-; CHECK-NEXT:    vminnm.f32 s2, s2, s0
+; CHECK-NEXT:    vcmp.f32 s0, s2
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vselgt.f32 s2, s2, s0
 ; CHECK-NEXT:    vcmp.f32 s0, s2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vselge.f32 s0, s0, s2

diff  --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
index 2f86499a2df9e..4f1ba8d35e5f1 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
@@ -1974,8 +1974,9 @@ define void @bcast_unfold_fmax_v4f32(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB60_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpnltps 4096(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT:    vmovups %xmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovups 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT:    vmaxps %xmm0, %xmm1, %xmm1
+; CHECK-NEXT:    vmovups %xmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    jne .LBB60_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2006,8 +2007,9 @@ define void @bcast_unfold_fmax_v8f32(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB61_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpnltps 4096(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT:    vmovups %ymm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovups 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vmaxps %ymm0, %ymm1, %ymm1
+; CHECK-NEXT:    vmovups %ymm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB61_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2039,8 +2041,9 @@ define void @bcast_unfold_fmax_v16f32(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB62_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpnltps 4096(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT:    vmovups %zmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovups 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vmaxps %zmm0, %zmm1, %zmm1
+; CHECK-NEXT:    vmovups %zmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB62_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2073,8 +2076,9 @@ define void @bcast_unfold_fmax_v2f64(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB63_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpnltpd 8192(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT:    vmovupd %xmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovupd 8192(%rdi,%rax), %xmm1
+; CHECK-NEXT:    vmaxpd %xmm0, %xmm1, %xmm1
+; CHECK-NEXT:    vmovupd %xmm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    jne .LBB63_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2105,8 +2109,9 @@ define void @bcast_unfold_fmax_v4f64(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB64_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpnltpd 8192(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT:    vmovupd %ymm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovupd 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vmaxpd %ymm0, %ymm1, %ymm1
+; CHECK-NEXT:    vmovupd %ymm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB64_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2138,8 +2143,9 @@ define void @bcast_unfold_fmax_v8f64(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB65_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpnltpd 8192(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT:    vmovupd %zmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovupd 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vmaxpd %zmm0, %zmm1, %zmm1
+; CHECK-NEXT:    vmovupd %zmm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB65_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2171,8 +2177,9 @@ define void @bcast_unfold_fmin_v4f32(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB66_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpngtps 4096(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT:    vmovups %xmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovups 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT:    vminps %xmm0, %xmm1, %xmm1
+; CHECK-NEXT:    vmovups %xmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    jne .LBB66_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2203,8 +2210,9 @@ define void @bcast_unfold_fmin_v8f32(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB67_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpngtps 4096(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT:    vmovups %ymm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovups 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vminps %ymm0, %ymm1, %ymm1
+; CHECK-NEXT:    vmovups %ymm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB67_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2236,8 +2244,9 @@ define void @bcast_unfold_fmin_v16f32(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB68_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpngtps 4096(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT:    vmovups %zmm0, 4096(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovups 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vminps %zmm0, %zmm1, %zmm1
+; CHECK-NEXT:    vmovups %zmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB68_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2270,8 +2279,9 @@ define void @bcast_unfold_fmin_v2f64(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB69_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpngtpd 8192(%rdi,%rax), %xmm0, %k1
-; CHECK-NEXT:    vmovupd %xmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovupd 8192(%rdi,%rax), %xmm1
+; CHECK-NEXT:    vminpd %xmm0, %xmm1, %xmm1
+; CHECK-NEXT:    vmovupd %xmm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    jne .LBB69_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2302,8 +2312,9 @@ define void @bcast_unfold_fmin_v4f64(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB70_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpngtpd 8192(%rdi,%rax), %ymm0, %k1
-; CHECK-NEXT:    vmovupd %ymm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovupd 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vminpd %ymm0, %ymm1, %ymm1
+; CHECK-NEXT:    vmovupd %ymm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB70_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -2335,8 +2346,9 @@ define void @bcast_unfold_fmin_v8f64(ptr %arg) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB71_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vcmpngtpd 8192(%rdi,%rax), %zmm0, %k1
-; CHECK-NEXT:    vmovupd %zmm0, 8192(%rdi,%rax) {%k1}
+; CHECK-NEXT:    vmovupd 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vminpd %zmm0, %zmm1, %zmm1
+; CHECK-NEXT:    vmovupd %zmm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB71_1
 ; CHECK-NEXT:  # %bb.2: # %bb10

diff  --git a/llvm/test/CodeGen/X86/sse-minmax.ll b/llvm/test/CodeGen/X86/sse-minmax.ll
index 2b97f98450973..c1fb1c6b87a23 100644
--- a/llvm/test/CodeGen/X86/sse-minmax.ll
+++ b/llvm/test/CodeGen/X86/sse-minmax.ll
@@ -365,11 +365,8 @@ define double @ult_x(double %x)  {
 define double @ugt_inverse_x(double %x)  {
 ; CHECK-LABEL: ugt_inverse_x:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorpd %xmm2, %xmm2
-; CHECK-NEXT:    movapd %xmm0, %xmm1
-; CHECK-NEXT:    cmpnlesd %xmm2, %xmm1
-; CHECK-NEXT:    andnpd %xmm0, %xmm1
-; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    xorpd %xmm1, %xmm1
+; CHECK-NEXT:    minsd %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %c = fcmp ugt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -380,9 +377,7 @@ define double @ult_inverse_x(double %x)  {
 ; CHECK-LABEL: ult_inverse_x:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
-; CHECK-NEXT:    cmpnlesd %xmm0, %xmm1
-; CHECK-NEXT:    andnpd %xmm0, %xmm1
-; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    maxsd %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %c = fcmp ult double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -583,10 +578,7 @@ define double @ult_y(double %x)  {
 define double @ugt_inverse_y(double %x)  {
 ; CHECK-LABEL: ugt_inverse_y:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movapd %xmm0, %xmm1
-; CHECK-NEXT:    cmpnlesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT:    blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %c = fcmp ugt double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x
@@ -596,11 +588,7 @@ define double @ugt_inverse_y(double %x)  {
 define double @ult_inverse_y(double %x)  {
 ; CHECK-LABEL: ult_inverse_y:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movapd %xmm0, %xmm1
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
-; CHECK-NEXT:    cmpnlesd %xmm1, %xmm0
-; CHECK-NEXT:    blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %c = fcmp ult double %x, -0.000000e+00
   %d = select i1 %c, double -0.000000e+00, double %x


        


More information about the llvm-commits mailing list