[llvm] SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom (PR #104564)

YunQiang Su via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 16 01:56:17 PDT 2024


https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/104564

The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included.

Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.

>From d05bb70e810d909c44700baf56349e982eb8a93f Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Fri, 16 Aug 2024 16:49:47 +0800
Subject: [PATCH] SelectionDAG: Use qNaN constant if FCANONICALIZE not
 LegalOrCustom

The default Action of ISD::FCANONICALIZE is Legal, while in fact,
on most architectures, it is not defined. X86 is included.

Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that
we can determine whether it is LegalOrCustom.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   12 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |    3 +
 .../CodeGen/X86/fp-maximumnum-minimumnum.ll   | 1363 +++++++++++++++++
 3 files changed, 1376 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2c939967a5e1d9..7ee28d08d556c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8622,8 +8622,16 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
   // If MinMax is NaN, let's quiet it.
   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
       !DAG.isKnownNeverNaN(RHS)) {
-    SDValue MinMaxQuiet =
-        DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    SDValue MinMaxQuiet;
+    if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) {
+      MinMaxQuiet = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    } else {
+      // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN.
+      // ISD::FCANONICALIZE is supported by MIPS.
+      // HPPA is not supported by LLVM yet.
+      MinMaxQuiet =
+          DAG.getConstantFP(APFloat::getQNaN(VT.getFltSemantics()), DL, VT);
+    }
     MinMax =
         DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
   }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11c9a992cbdee9..e929d98e959715 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -608,6 +608,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FMAXNUM, VT, Action);
     setOperationAction(ISD::FMINIMUM, VT, Action);
     setOperationAction(ISD::FMAXIMUM, VT, Action);
+    setOperationAction(ISD::FCANONICALIZE, VT, Action);
     setOperationAction(ISD::FSIN, VT, Action);
     setOperationAction(ISD::FCOS, VT, Action);
     setOperationAction(ISD::FSINCOS, VT, Action);
@@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
+
+      setOperationAction(ISD::FCANONICALIZE, VT, Expand);
     }
 
     // Half type will be promoted by default.
diff --git a/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..6dd7e582fae0b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,1363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=x86_64 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc --mtriple=x86_64 --mattr=+avx < %s | FileCheck %s --check-prefix=X64AVX
+; RUN: llc --mtriple=x86_64 --mattr=+avx512fp16 < %s | FileCheck %s --check-prefix=X64AVX512FP16
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; X64-LABEL: maximumnum_float:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm3
+; X64-NEXT:    cmpunordss %xmm1, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm3
+; X64-NEXT:    orps %xmm0, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    cmpltss %xmm2, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    andps %xmm2, %xmm1
+; X64-NEXT:    andnps %xmm3, %xmm0
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpunordss %xmm0, %xmm1
+; X64-NEXT:    movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm1, %xmm4
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm4
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    cmpeqss %xmm4, %xmm1
+; X64-NEXT:    movd %xmm2, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    je .LBB0_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movaps %xmm4, %xmm2
+; X64-NEXT:  .LBB0_2:
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm4, %xmm0
+; X64-NEXT:    movd %xmm3, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    je .LBB0_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:  .LBB0_4:
+; X64-NEXT:    andps %xmm3, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltss %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovd %xmm2, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    je .LBB0_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB0_2:
+; X64AVX-NEXT:    vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    je .LBB0_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB0_4:
+; X64AVX-NEXT:    vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT:    testl %eax, %eax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT:    testl %eax, %eax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    cmpunordss %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm3
+; X64-NEXT:    andps %xmm2, %xmm3
+; X64-NEXT:    andnps %xmm1, %xmm0
+; X64-NEXT:    orps %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpltss %xmm2, %xmm1
+; X64-NEXT:    andps %xmm1, %xmm2
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm2
+; X64-NEXT:    cmpunordss %xmm1, %xmm2
+; X64-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm2
+; X64-NEXT:    orps %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltss %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    js .LBB2_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    jmp .LBB2_3
+; X64-NEXT:  .LBB2_1:
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:  .LBB2_3:
+; X64-NEXT:    maxss %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    js .LBB2_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB2_1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclassss $3, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; X64-LABEL: maximumnum_double:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm3
+; X64-NEXT:    cmpunordsd %xmm1, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm3
+; X64-NEXT:    orpd %xmm0, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    cmpltsd %xmm2, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    andpd %xmm2, %xmm1
+; X64-NEXT:    andnpd %xmm3, %xmm0
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpunordsd %xmm0, %xmm1
+; X64-NEXT:    movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm1, %xmm4
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm4
+; X64-NEXT:    xorpd %xmm1, %xmm1
+; X64-NEXT:    cmpeqsd %xmm4, %xmm1
+; X64-NEXT:    movq %xmm2, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    je .LBB3_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movapd %xmm4, %xmm2
+; X64-NEXT:  .LBB3_2:
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    andnpd %xmm4, %xmm0
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    je .LBB3_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:  .LBB3_4:
+; X64-NEXT:    andpd %xmm3, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltsd %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovq %xmm2, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    je .LBB3_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB3_2:
+; X64AVX-NEXT:    vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    je .LBB3_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB3_4:
+; X64AVX-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT:    testq %rax, %rax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT:    testq %rax, %rax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    cmpunordsd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm3
+; X64-NEXT:    andpd %xmm2, %xmm3
+; X64-NEXT:    andnpd %xmm1, %xmm0
+; X64-NEXT:    orpd %xmm3, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpltsd %xmm2, %xmm1
+; X64-NEXT:    andpd %xmm1, %xmm2
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm2
+; X64-NEXT:    cmpunordsd %xmm1, %xmm2
+; X64-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm2
+; X64-NEXT:    orpd %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltsd %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    js .LBB5_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    jmp .LBB5_3
+; X64-NEXT:  .LBB5_1:
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:  .LBB5_3:
+; X64-NEXT:    maxsd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    js .LBB5_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB5_1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclasssd $3, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; X64-LABEL: minimumnum_float:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm3
+; X64-NEXT:    cmpunordss %xmm1, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm3
+; X64-NEXT:    orps %xmm0, %xmm3
+; X64-NEXT:    movaps %xmm2, %xmm0
+; X64-NEXT:    cmpltss %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    andps %xmm2, %xmm1
+; X64-NEXT:    andnps %xmm3, %xmm0
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpunordss %xmm0, %xmm1
+; X64-NEXT:    movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm1, %xmm4
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm4
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    cmpeqss %xmm4, %xmm1
+; X64-NEXT:    movd %xmm2, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    jo .LBB6_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movaps %xmm4, %xmm2
+; X64-NEXT:  .LBB6_2:
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm4, %xmm0
+; X64-NEXT:    movd %xmm3, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    jo .LBB6_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:  .LBB6_4:
+; X64-NEXT:    andps %xmm3, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_float:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltss %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovd %xmm2, %eax
+; X64AVX-NEXT:    negl %eax
+; X64AVX-NEXT:    jo .LBB6_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB6_2:
+; X64AVX-NEXT:    vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    negl %eax
+; X64AVX-NEXT:    jo .LBB6_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB6_4:
+; X64AVX-NEXT:    vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_float:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT:    negl %eax
+; X64AVX512FP16-NEXT:    seto %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT:    negl %eax
+; X64AVX512FP16-NEXT:    seto %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: minimumnum_float_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    cmpunordss %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm3
+; X64-NEXT:    andps %xmm2, %xmm3
+; X64-NEXT:    andnps %xmm1, %xmm0
+; X64-NEXT:    orps %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm2, %xmm1
+; X64-NEXT:    cmpltss %xmm0, %xmm1
+; X64-NEXT:    andps %xmm1, %xmm2
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm2
+; X64-NEXT:    cmpunordss %xmm1, %xmm2
+; X64-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm2
+; X64-NEXT:    orps %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_float_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltss %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_float_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: minimumnum_float_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    js .LBB8_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    minss %xmm1, %xmm0
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB8_1:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    minss %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_float_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    js .LBB8_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB8_1:
+; X64AVX-NEXT:    vmovdqa %xmm0, %xmm2
+; X64AVX-NEXT:    vminss %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_float_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclassss $5, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vminss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define double @minimumnum_double(double %x, double %y) {
+; X64-LABEL: minimumnum_double:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm3
+; X64-NEXT:    cmpunordsd %xmm1, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm3
+; X64-NEXT:    orpd %xmm0, %xmm3
+; X64-NEXT:    movapd %xmm2, %xmm0
+; X64-NEXT:    cmpltsd %xmm3, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    andpd %xmm2, %xmm1
+; X64-NEXT:    andnpd %xmm3, %xmm0
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpunordsd %xmm0, %xmm1
+; X64-NEXT:    movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm1, %xmm4
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm4
+; X64-NEXT:    xorpd %xmm1, %xmm1
+; X64-NEXT:    cmpeqsd %xmm4, %xmm1
+; X64-NEXT:    movq %xmm2, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    jo .LBB9_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movapd %xmm4, %xmm2
+; X64-NEXT:  .LBB9_2:
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    andnpd %xmm4, %xmm0
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    jo .LBB9_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:  .LBB9_4:
+; X64-NEXT:    andpd %xmm3, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_double:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltsd %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovq %xmm2, %rax
+; X64AVX-NEXT:    negq %rax
+; X64AVX-NEXT:    jo .LBB9_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB9_2:
+; X64AVX-NEXT:    vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    negq %rax
+; X64AVX-NEXT:    jo .LBB9_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB9_4:
+; X64AVX-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_double:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT:    negq %rax
+; X64AVX512FP16-NEXT:    seto %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT:    negq %rax
+; X64AVX512FP16-NEXT:    seto %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: minimumnum_double_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    cmpunordsd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm3
+; X64-NEXT:    andpd %xmm2, %xmm3
+; X64-NEXT:    andnpd %xmm1, %xmm0
+; X64-NEXT:    orpd %xmm3, %xmm0
+; X64-NEXT:    movapd %xmm2, %xmm1
+; X64-NEXT:    cmpltsd %xmm0, %xmm1
+; X64-NEXT:    andpd %xmm1, %xmm2
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm2
+; X64-NEXT:    cmpunordsd %xmm1, %xmm2
+; X64-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm2
+; X64-NEXT:    orpd %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_double_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltsd %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_double_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: minimumnum_double_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    js .LBB11_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    minsd %xmm1, %xmm0
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB11_1:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    minsd %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_double_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    js .LBB11_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB11_1:
+; X64AVX-NEXT:    vmovdqa %xmm0, %xmm2
+; X64AVX-NEXT:    vminsd %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_double_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclasssd $5, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vminsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define half @minimumnum_half(half %x, half %y) {
+; X64-LABEL: minimumnum_half:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    subq $56, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 80
+; X64-NEXT:    .cfi_offset %rbx, -24
+; X64-NEXT:    .cfi_offset %rbp, -16
+; X64-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss %xmm0, %xmm0
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jp .LBB12_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB12_2:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss %xmm0, %xmm0
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jp .LBB12_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB12_4:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jb .LBB12_6
+; X64-NEXT:  # %bb.5:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB12_6:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss %xmm0, %xmm0
+; X64-NEXT:    jp .LBB12_7
+; X64-NEXT:  # %bb.8:
+; X64-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jmp .LBB12_9
+; X64-NEXT:  .LBB12_7:
+; X64-NEXT:    movd {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:  .LBB12_9:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    pextrw $0, %xmm0, %ebx
+; X64-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT:    pextrw $0, %xmm1, %eax
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; X64-NEXT:    cmovnel %ebx, %eax
+; X64-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT:    pextrw $0, %xmm1, %ebp
+; X64-NEXT:    movzwl %bp, %ecx
+; X64-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; X64-NEXT:    cmovnel %eax, %ebp
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    ucomiss %xmm1, %xmm0
+; X64-NEXT:    cmovnel %ebx, %ebp
+; X64-NEXT:    pinsrw $0, %ebp, %xmm0
+; X64-NEXT:    addq $56, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_half:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    pushq %rbp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 16
+; X64AVX-NEXT:    pushq %rbx
+; X64AVX-NEXT:    .cfi_def_cfa_offset 24
+; X64AVX-NEXT:    subq $56, %rsp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 80
+; X64AVX-NEXT:    .cfi_offset %rbx, -24
+; X64AVX-NEXT:    .cfi_offset %rbp, -16
+; X64AVX-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss %xmm0, %xmm0
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jp .LBB12_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB12_2:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss %xmm0, %xmm0
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jp .LBB12_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB12_4:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jb .LBB12_6
+; X64AVX-NEXT:  # %bb.5:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB12_6:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss %xmm0, %xmm0
+; X64AVX-NEXT:    jp .LBB12_7
+; X64AVX-NEXT:  # %bb.8:
+; X64AVX-NEXT:    vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jmp .LBB12_9
+; X64AVX-NEXT:  .LBB12_7:
+; X64AVX-NEXT:    vmovd {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX-NEXT:  .LBB12_9:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vpextrw $0, %xmm0, %ebx
+; X64AVX-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64AVX-NEXT:    vpextrw $0, %xmm1, %eax
+; X64AVX-NEXT:    movzwl %ax, %ecx
+; X64AVX-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; X64AVX-NEXT:    cmovnel %ebx, %eax
+; X64AVX-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64AVX-NEXT:    vpextrw $0, %xmm1, %ebp
+; X64AVX-NEXT:    movzwl %bp, %ecx
+; X64AVX-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; X64AVX-NEXT:    cmovnel %eax, %ebp
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X64AVX-NEXT:    vucomiss %xmm1, %xmm0
+; X64AVX-NEXT:    cmovnel %ebx, %ebp
+; X64AVX-NEXT:    vpinsrw $0, %ebp, %xmm0, %xmm0
+; X64AVX-NEXT:    addq $56, %rsp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 24
+; X64AVX-NEXT:    popq %rbx
+; X64AVX-NEXT:    .cfi_def_cfa_offset 16
+; X64AVX-NEXT:    popq %rbp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 8
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_half:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsh %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsh %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsh %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsh %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovsh {{.*#+}} xmm3 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovsh %xmm3, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqsh %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; X64AVX512FP16-NEXT:    movzwl %ax, %eax
+; X64AVX512FP16-NEXT:    cmpl $32768, %eax # imm = 0x8000
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovw %xmm1, %eax
+; X64AVX512FP16-NEXT:    movzwl %ax, %eax
+; X64AVX512FP16-NEXT:    cmpl $32768, %eax # imm = 0x8000
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovsh %xmm1, %xmm0, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovsh %xmm3, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call half @llvm.minimumnum.f16(half %x, half %y)
+  ret half %z
+}
+
+define half @minimumnum_half_nsz(half %x, half %y) {
+; X64-LABEL: minimumnum_half_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss %xmm0, %xmm0
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jp .LBB13_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB13_2:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss %xmm0, %xmm0
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jp .LBB13_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB13_4:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jb .LBB13_6
+; X64-NEXT:  # %bb.5:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB13_6:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss %xmm0, %xmm0
+; X64-NEXT:    jp .LBB13_7
+; X64-NEXT:  # %bb.8:
+; X64-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jmp .LBB13_9
+; X64-NEXT:  .LBB13_7:
+; X64-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:  .LBB13_9:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_half_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    subq $24, %rsp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 32
+; X64AVX-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss %xmm0, %xmm0
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jp .LBB13_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB13_2:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss %xmm0, %xmm0
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jp .LBB13_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB13_4:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jb .LBB13_6
+; X64AVX-NEXT:  # %bb.5:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB13_6:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss %xmm0, %xmm0
+; X64AVX-NEXT:    jp .LBB13_7
+; X64AVX-NEXT:  # %bb.8:
+; X64AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jmp .LBB13_9
+; X64AVX-NEXT:  .LBB13_7:
+; X64AVX-NEXT:    vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX-NEXT:  .LBB13_9:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    addq $24, %rsp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 8
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_half_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsh %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsh %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsh %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsh %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsh {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz half @llvm.minimumnum.f16(half %x, half %y)
+  ret half %z
+}
+
+define half @minimumnum_half_nnan(half %x, half %y) {
+; X64-LABEL: minimumnum_half_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    subq $48, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 80
+; X64-NEXT:    .cfi_offset %rbx, -32
+; X64-NEXT:    .cfi_offset %r14, -24
+; X64-NEXT:    .cfi_offset %rbp, -16
+; X64-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    pextrw $0, %xmm1, %ebx
+; X64-NEXT:    pextrw $0, %xmm0, %ebp
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    jb .LBB14_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB14_2:
+; X64-NEXT:    callq __truncsfhf2 at PLT
+; X64-NEXT:    pextrw $0, %xmm0, %r14d
+; X64-NEXT:    movzwl %bp, %eax
+; X64-NEXT:    cmpl $32768, %eax # imm = 0x8000
+; X64-NEXT:    movl %r14d, %eax
+; X64-NEXT:    cmovel %ebp, %eax
+; X64-NEXT:    movzwl %bx, %ecx
+; X64-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; X64-NEXT:    cmovnel %eax, %ebx
+; X64-NEXT:    callq __extendhfsf2 at PLT
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    ucomiss %xmm1, %xmm0
+; X64-NEXT:    cmovnel %r14d, %ebx
+; X64-NEXT:    pinsrw $0, %ebx, %xmm0
+; X64-NEXT:    addq $48, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    popq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_half_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    pushq %rbp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 16
+; X64AVX-NEXT:    pushq %r14
+; X64AVX-NEXT:    .cfi_def_cfa_offset 24
+; X64AVX-NEXT:    pushq %rbx
+; X64AVX-NEXT:    .cfi_def_cfa_offset 32
+; X64AVX-NEXT:    subq $48, %rsp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 80
+; X64AVX-NEXT:    .cfi_offset %rbx, -32
+; X64AVX-NEXT:    .cfi_offset %r14, -24
+; X64AVX-NEXT:    .cfi_offset %rbp, -16
+; X64AVX-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT:    vpextrw $0, %xmm1, %ebx
+; X64AVX-NEXT:    vpextrw $0, %xmm0, %ebp
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT:    vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:    jb .LBB14_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT:  .LBB14_2:
+; X64AVX-NEXT:    callq __truncsfhf2 at PLT
+; X64AVX-NEXT:    vpextrw $0, %xmm0, %r14d
+; X64AVX-NEXT:    movzwl %bp, %eax
+; X64AVX-NEXT:    cmpl $32768, %eax # imm = 0x8000
+; X64AVX-NEXT:    movl %r14d, %eax
+; X64AVX-NEXT:    cmovel %ebp, %eax
+; X64AVX-NEXT:    movzwl %bx, %ecx
+; X64AVX-NEXT:    cmpl $32768, %ecx # imm = 0x8000
+; X64AVX-NEXT:    cmovnel %eax, %ebx
+; X64AVX-NEXT:    callq __extendhfsf2 at PLT
+; X64AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X64AVX-NEXT:    vucomiss %xmm1, %xmm0
+; X64AVX-NEXT:    cmovnel %r14d, %ebx
+; X64AVX-NEXT:    vpinsrw $0, %ebx, %xmm0, %xmm0
+; X64AVX-NEXT:    addq $48, %rsp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 32
+; X64AVX-NEXT:    popq %rbx
+; X64AVX-NEXT:    .cfi_def_cfa_offset 24
+; X64AVX-NEXT:    popq %r14
+; X64AVX-NEXT:    .cfi_def_cfa_offset 16
+; X64AVX-NEXT:    popq %rbp
+; X64AVX-NEXT:    .cfi_def_cfa_offset 8
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: minimumnum_half_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclasssh $5, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vminsh %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan half @llvm.minimumnum.f16(half %x, half %y)
+  ret half %z
+}
+



More information about the llvm-commits mailing list