[llvm] SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom (PR #104564)
YunQiang Su via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 01:56:17 PDT 2024
https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/104564
The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included.
Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.
>From d05bb70e810d909c44700baf56349e982eb8a93f Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Fri, 16 Aug 2024 16:49:47 +0800
Subject: [PATCH] SelectionDAG: Use qNaN constant if FCANONICALIZE not
LegalOrCustom
The default Action of ISD::FCANONICALIZE is Legal, while in fact,
on most architectures, it is not defined. X86 is included.
Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that
we can determine whether it is LegalOrCustom.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 12 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +
.../CodeGen/X86/fp-maximumnum-minimumnum.ll | 1363 +++++++++++++++++
3 files changed, 1376 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2c939967a5e1d9..7ee28d08d556c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8622,8 +8622,16 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
// If MinMax is NaN, let's quiet it.
if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
!DAG.isKnownNeverNaN(RHS)) {
- SDValue MinMaxQuiet =
- DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+ SDValue MinMaxQuiet;
+ if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) {
+ MinMaxQuiet = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+ } else {
+ // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN.
+ // ISD::FCANONICALIZE is supported by MIPS.
+ // HPPA is not supported by LLVM yet.
+ MinMaxQuiet =
+ DAG.getConstantFP(APFloat::getQNaN(VT.getFltSemantics()), DL, VT);
+ }
MinMax =
DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11c9a992cbdee9..e929d98e959715 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -608,6 +608,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, VT, Action);
setOperationAction(ISD::FMINIMUM, VT, Action);
setOperationAction(ISD::FMAXIMUM, VT, Action);
+ setOperationAction(ISD::FCANONICALIZE, VT, Action);
setOperationAction(ISD::FSIN, VT, Action);
setOperationAction(ISD::FCOS, VT, Action);
setOperationAction(ISD::FSINCOS, VT, Action);
@@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
+
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
// Half type will be promoted by default.
diff --git a/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..6dd7e582fae0b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,1363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=x86_64 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc --mtriple=x86_64 --mattr=+avx < %s | FileCheck %s --check-prefix=X64AVX
+; RUN: llc --mtriple=x86_64 --mattr=+avx512fp16 < %s | FileCheck %s --check-prefix=X64AVX512FP16
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; X64-LABEL: maximumnum_float:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm3
+; X64-NEXT: cmpunordss %xmm1, %xmm3
+; X64-NEXT: movaps %xmm3, %xmm0
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm3
+; X64-NEXT: orps %xmm0, %xmm3
+; X64-NEXT: movaps %xmm3, %xmm0
+; X64-NEXT: cmpltss %xmm2, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: andps %xmm2, %xmm1
+; X64-NEXT: andnps %xmm3, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: cmpunordss %xmm0, %xmm1
+; X64-NEXT: movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm1, %xmm4
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm1, %xmm4
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: cmpeqss %xmm4, %xmm1
+; X64-NEXT: movd %xmm2, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB0_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movaps %xmm4, %xmm2
+; X64-NEXT: .LBB0_2:
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: andnps %xmm4, %xmm0
+; X64-NEXT: movd %xmm3, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB0_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: .LBB0_4:
+; X64-NEXT: andps %xmm3, %xmm1
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_float:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltss %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT: vmovd %xmm2, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: je .LBB0_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX-NEXT: .LBB0_2:
+; X64AVX-NEXT: vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT: vmovd %xmm0, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: je .LBB0_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX-NEXT: .LBB0_4:
+; X64AVX-NEXT: vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT: testl %eax, %eax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT: testl %eax, %eax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nsz:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: cmpunordss %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm3
+; X64-NEXT: andps %xmm2, %xmm3
+; X64-NEXT: andnps %xmm1, %xmm0
+; X64-NEXT: orps %xmm3, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: cmpltss %xmm2, %xmm1
+; X64-NEXT: andps %xmm1, %xmm2
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm2, %xmm1
+; X64-NEXT: movaps %xmm1, %xmm2
+; X64-NEXT: cmpunordss %xmm1, %xmm2
+; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm2
+; X64-NEXT: orps %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_float_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: js .LBB2_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: movdqa %xmm0, %xmm2
+; X64-NEXT: jmp .LBB2_3
+; X64-NEXT: .LBB2_1:
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: movdqa %xmm0, %xmm1
+; X64-NEXT: .LBB2_3:
+; X64-NEXT: maxss %xmm2, %xmm1
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_float_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovd %xmm0, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: js .LBB2_1
+; X64AVX-NEXT: # %bb.2:
+; X64AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+; X64AVX-NEXT: .LBB2_1:
+; X64AVX-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclassss $3, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; X64-LABEL: maximumnum_double:
+; X64: # %bb.0:
+; X64-NEXT: movapd %xmm0, %xmm2
+; X64-NEXT: cmpunordsd %xmm0, %xmm2
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: andpd %xmm1, %xmm3
+; X64-NEXT: andnpd %xmm0, %xmm2
+; X64-NEXT: orpd %xmm3, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm3
+; X64-NEXT: cmpunordsd %xmm1, %xmm3
+; X64-NEXT: movapd %xmm3, %xmm0
+; X64-NEXT: andpd %xmm2, %xmm0
+; X64-NEXT: andnpd %xmm1, %xmm3
+; X64-NEXT: orpd %xmm0, %xmm3
+; X64-NEXT: movapd %xmm3, %xmm0
+; X64-NEXT: cmpltsd %xmm2, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: andpd %xmm2, %xmm1
+; X64-NEXT: andnpd %xmm3, %xmm0
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: cmpunordsd %xmm0, %xmm1
+; X64-NEXT: movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT: andpd %xmm1, %xmm4
+; X64-NEXT: andnpd %xmm0, %xmm1
+; X64-NEXT: orpd %xmm1, %xmm4
+; X64-NEXT: xorpd %xmm1, %xmm1
+; X64-NEXT: cmpeqsd %xmm4, %xmm1
+; X64-NEXT: movq %xmm2, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: je .LBB3_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movapd %xmm4, %xmm2
+; X64-NEXT: .LBB3_2:
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: andnpd %xmm4, %xmm0
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: je .LBB3_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: .LBB3_4:
+; X64-NEXT: andpd %xmm3, %xmm1
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_double:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltsd %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT: vmovq %xmm2, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: je .LBB3_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX-NEXT: .LBB3_2:
+; X64AVX-NEXT: vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT: vmovq %xmm0, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: je .LBB3_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovapd %xmm2, %xmm0
+; X64AVX-NEXT: .LBB3_4:
+; X64AVX-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT: testq %rax, %rax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT: testq %rax, %rax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call double @llvm.maximumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nsz:
+; X64: # %bb.0:
+; X64-NEXT: movapd %xmm0, %xmm2
+; X64-NEXT: cmpunordsd %xmm0, %xmm2
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: andpd %xmm1, %xmm3
+; X64-NEXT: andnpd %xmm0, %xmm2
+; X64-NEXT: orpd %xmm3, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: cmpunordsd %xmm1, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm3
+; X64-NEXT: andpd %xmm2, %xmm3
+; X64-NEXT: andnpd %xmm1, %xmm0
+; X64-NEXT: orpd %xmm3, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: cmpltsd %xmm2, %xmm1
+; X64-NEXT: andpd %xmm1, %xmm2
+; X64-NEXT: andnpd %xmm0, %xmm1
+; X64-NEXT: orpd %xmm2, %xmm1
+; X64-NEXT: movapd %xmm1, %xmm2
+; X64-NEXT: cmpunordsd %xmm1, %xmm2
+; X64-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT: andpd %xmm2, %xmm0
+; X64-NEXT: andnpd %xmm1, %xmm2
+; X64-NEXT: orpd %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_double_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movq %xmm0, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: js .LBB5_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: movdqa %xmm0, %xmm2
+; X64-NEXT: jmp .LBB5_3
+; X64-NEXT: .LBB5_1:
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: movdqa %xmm0, %xmm1
+; X64-NEXT: .LBB5_3:
+; X64-NEXT: maxsd %xmm2, %xmm1
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_double_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovq %xmm0, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: js .LBB5_1
+; X64AVX-NEXT: # %bb.2:
+; X64AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+; X64AVX-NEXT: .LBB5_1:
+; X64AVX-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX-NEXT: vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclasssd $3, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; X64-LABEL: minimumnum_float:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm3
+; X64-NEXT: cmpunordss %xmm1, %xmm3
+; X64-NEXT: movaps %xmm3, %xmm0
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm3
+; X64-NEXT: orps %xmm0, %xmm3
+; X64-NEXT: movaps %xmm2, %xmm0
+; X64-NEXT: cmpltss %xmm3, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: andps %xmm2, %xmm1
+; X64-NEXT: andnps %xmm3, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: cmpunordss %xmm0, %xmm1
+; X64-NEXT: movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm1, %xmm4
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm1, %xmm4
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: cmpeqss %xmm4, %xmm1
+; X64-NEXT: movd %xmm2, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: jo .LBB6_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movaps %xmm4, %xmm2
+; X64-NEXT: .LBB6_2:
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: andnps %xmm4, %xmm0
+; X64-NEXT: movd %xmm3, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: jo .LBB6_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: .LBB6_4:
+; X64-NEXT: andps %xmm3, %xmm1
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_float:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltss %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT: vmovd %xmm2, %eax
+; X64AVX-NEXT: negl %eax
+; X64AVX-NEXT: jo .LBB6_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX-NEXT: .LBB6_2:
+; X64AVX-NEXT: vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT: vmovd %xmm0, %eax
+; X64AVX-NEXT: negl %eax
+; X64AVX-NEXT: jo .LBB6_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX-NEXT: .LBB6_4:
+; X64AVX-NEXT: vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_float:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltss %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT: negl %eax
+; X64AVX512FP16-NEXT: seto %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT: negl %eax
+; X64AVX512FP16-NEXT: seto %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call float @llvm.minimumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define float @minimumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: minimumnum_float_nsz:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: cmpunordss %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm3
+; X64-NEXT: andps %xmm2, %xmm3
+; X64-NEXT: andnps %xmm1, %xmm0
+; X64-NEXT: orps %xmm3, %xmm0
+; X64-NEXT: movaps %xmm2, %xmm1
+; X64-NEXT: cmpltss %xmm0, %xmm1
+; X64-NEXT: andps %xmm1, %xmm2
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm2, %xmm1
+; X64-NEXT: movaps %xmm1, %xmm2
+; X64-NEXT: cmpunordss %xmm1, %xmm2
+; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm2
+; X64-NEXT: orps %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_float_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_float_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltss %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz float @llvm.minimumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define float @minimumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: minimumnum_float_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: js .LBB8_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: minss %xmm1, %xmm0
+; X64-NEXT: retq
+; X64-NEXT: .LBB8_1:
+; X64-NEXT: movdqa %xmm0, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: minss %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_float_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovd %xmm0, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: js .LBB8_1
+; X64AVX-NEXT: # %bb.2:
+; X64AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: retq
+; X64AVX-NEXT: .LBB8_1:
+; X64AVX-NEXT: vmovdqa %xmm0, %xmm2
+; X64AVX-NEXT: vminss %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_float_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclassss $5, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vminss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define double @minimumnum_double(double %x, double %y) {
+; X64-LABEL: minimumnum_double:
+; X64: # %bb.0:
+; X64-NEXT: movapd %xmm0, %xmm2
+; X64-NEXT: cmpunordsd %xmm0, %xmm2
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: andpd %xmm1, %xmm3
+; X64-NEXT: andnpd %xmm0, %xmm2
+; X64-NEXT: orpd %xmm3, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm3
+; X64-NEXT: cmpunordsd %xmm1, %xmm3
+; X64-NEXT: movapd %xmm3, %xmm0
+; X64-NEXT: andpd %xmm2, %xmm0
+; X64-NEXT: andnpd %xmm1, %xmm3
+; X64-NEXT: orpd %xmm0, %xmm3
+; X64-NEXT: movapd %xmm2, %xmm0
+; X64-NEXT: cmpltsd %xmm3, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: andpd %xmm2, %xmm1
+; X64-NEXT: andnpd %xmm3, %xmm0
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: cmpunordsd %xmm0, %xmm1
+; X64-NEXT: movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT: andpd %xmm1, %xmm4
+; X64-NEXT: andnpd %xmm0, %xmm1
+; X64-NEXT: orpd %xmm1, %xmm4
+; X64-NEXT: xorpd %xmm1, %xmm1
+; X64-NEXT: cmpeqsd %xmm4, %xmm1
+; X64-NEXT: movq %xmm2, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: jo .LBB9_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movapd %xmm4, %xmm2
+; X64-NEXT: .LBB9_2:
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: andnpd %xmm4, %xmm0
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: jo .LBB9_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: .LBB9_4:
+; X64-NEXT: andpd %xmm3, %xmm1
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_double:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltsd %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT: vmovq %xmm2, %rax
+; X64AVX-NEXT: negq %rax
+; X64AVX-NEXT: jo .LBB9_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX-NEXT: .LBB9_2:
+; X64AVX-NEXT: vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT: vmovq %xmm0, %rax
+; X64AVX-NEXT: negq %rax
+; X64AVX-NEXT: jo .LBB9_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovapd %xmm2, %xmm0
+; X64AVX-NEXT: .LBB9_4:
+; X64AVX-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_double:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsd %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT: negq %rax
+; X64AVX512FP16-NEXT: seto %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT: negq %rax
+; X64AVX512FP16-NEXT: seto %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call double @llvm.minimumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define double @minimumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: minimumnum_double_nsz:
+; X64: # %bb.0:
+; X64-NEXT: movapd %xmm0, %xmm2
+; X64-NEXT: cmpunordsd %xmm0, %xmm2
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: andpd %xmm1, %xmm3
+; X64-NEXT: andnpd %xmm0, %xmm2
+; X64-NEXT: orpd %xmm3, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: cmpunordsd %xmm1, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm3
+; X64-NEXT: andpd %xmm2, %xmm3
+; X64-NEXT: andnpd %xmm1, %xmm0
+; X64-NEXT: orpd %xmm3, %xmm0
+; X64-NEXT: movapd %xmm2, %xmm1
+; X64-NEXT: cmpltsd %xmm0, %xmm1
+; X64-NEXT: andpd %xmm1, %xmm2
+; X64-NEXT: andnpd %xmm0, %xmm1
+; X64-NEXT: orpd %xmm2, %xmm1
+; X64-NEXT: movapd %xmm1, %xmm2
+; X64-NEXT: cmpunordsd %xmm1, %xmm2
+; X64-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT: andpd %xmm2, %xmm0
+; X64-NEXT: andnpd %xmm1, %xmm2
+; X64-NEXT: orpd %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_double_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_double_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsd %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz double @llvm.minimumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define double @minimumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: minimumnum_double_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movq %xmm0, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: js .LBB11_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: minsd %xmm1, %xmm0
+; X64-NEXT: retq
+; X64-NEXT: .LBB11_1:
+; X64-NEXT: movdqa %xmm0, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: minsd %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_double_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovq %xmm0, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: js .LBB11_1
+; X64AVX-NEXT: # %bb.2:
+; X64AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: retq
+; X64AVX-NEXT: .LBB11_1:
+; X64AVX-NEXT: vmovdqa %xmm0, %xmm2
+; X64AVX-NEXT: vminsd %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_double_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclasssd $5, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vminsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan double @llvm.minimumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define half @minimumnum_half(half %x, half %y) {
+; X64-LABEL: minimumnum_half:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: subq $56, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 80
+; X64-NEXT: .cfi_offset %rbx, -24
+; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jp .LBB12_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB12_2:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jp .LBB12_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB12_4:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jb .LBB12_6
+; X64-NEXT: # %bb.5:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB12_6:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: jp .LBB12_7
+; X64-NEXT: # %bb.8:
+; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jmp .LBB12_9
+; X64-NEXT: .LBB12_7:
+; X64-NEXT: movd {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: .LBB12_9:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: pextrw $0, %xmm0, %ebx
+; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT: pextrw $0, %xmm1, %eax
+; X64-NEXT: movzwl %ax, %ecx
+; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000
+; X64-NEXT: cmovnel %ebx, %eax
+; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT: pextrw $0, %xmm1, %ebp
+; X64-NEXT: movzwl %bp, %ecx
+; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000
+; X64-NEXT: cmovnel %eax, %ebp
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: ucomiss %xmm1, %xmm0
+; X64-NEXT: cmovnel %ebx, %ebp
+; X64-NEXT: pinsrw $0, %ebp, %xmm0
+; X64-NEXT: addq $56, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_half:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: pushq %rbp
+; X64AVX-NEXT: .cfi_def_cfa_offset 16
+; X64AVX-NEXT: pushq %rbx
+; X64AVX-NEXT: .cfi_def_cfa_offset 24
+; X64AVX-NEXT: subq $56, %rsp
+; X64AVX-NEXT: .cfi_def_cfa_offset 80
+; X64AVX-NEXT: .cfi_offset %rbx, -24
+; X64AVX-NEXT: .cfi_offset %rbp, -16
+; X64AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss %xmm0, %xmm0
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jp .LBB12_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB12_2:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss %xmm0, %xmm0
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jp .LBB12_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB12_4:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jb .LBB12_6
+; X64AVX-NEXT: # %bb.5:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB12_6:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss %xmm0, %xmm0
+; X64AVX-NEXT: jp .LBB12_7
+; X64AVX-NEXT: # %bb.8:
+; X64AVX-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jmp .LBB12_9
+; X64AVX-NEXT: .LBB12_7:
+; X64AVX-NEXT: vmovd {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX-NEXT: .LBB12_9:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vpextrw $0, %xmm0, %ebx
+; X64AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64AVX-NEXT: vpextrw $0, %xmm1, %eax
+; X64AVX-NEXT: movzwl %ax, %ecx
+; X64AVX-NEXT: cmpl $32768, %ecx # imm = 0x8000
+; X64AVX-NEXT: cmovnel %ebx, %eax
+; X64AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64AVX-NEXT: vpextrw $0, %xmm1, %ebp
+; X64AVX-NEXT: movzwl %bp, %ecx
+; X64AVX-NEXT: cmpl $32768, %ecx # imm = 0x8000
+; X64AVX-NEXT: cmovnel %eax, %ebp
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64AVX-NEXT: vucomiss %xmm1, %xmm0
+; X64AVX-NEXT: cmovnel %ebx, %ebp
+; X64AVX-NEXT: vpinsrw $0, %ebp, %xmm0, %xmm0
+; X64AVX-NEXT: addq $56, %rsp
+; X64AVX-NEXT: .cfi_def_cfa_offset 24
+; X64AVX-NEXT: popq %rbx
+; X64AVX-NEXT: .cfi_def_cfa_offset 16
+; X64AVX-NEXT: popq %rbp
+; X64AVX-NEXT: .cfi_def_cfa_offset 8
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_half:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsh %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsh %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovsh {{.*#+}} xmm3 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vmovsh %xmm3, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqsh %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovw %xmm0, %eax
+; X64AVX512FP16-NEXT: movzwl %ax, %eax
+; X64AVX512FP16-NEXT: cmpl $32768, %eax # imm = 0x8000
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovw %xmm1, %eax
+; X64AVX512FP16-NEXT: movzwl %ax, %eax
+; X64AVX512FP16-NEXT: cmpl $32768, %eax # imm = 0x8000
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovsh %xmm3, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call half @llvm.minimumnum.f16(half %x, half %y)
+ ret half %z
+}
+
+define half @minimumnum_half_nsz(half %x, half %y) {
+; X64-LABEL: minimumnum_half_nsz:
+; X64: # %bb.0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jp .LBB13_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB13_2:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jp .LBB13_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB13_4:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jb .LBB13_6
+; X64-NEXT: # %bb.5:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB13_6:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: jp .LBB13_7
+; X64-NEXT: # %bb.8:
+; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jmp .LBB13_9
+; X64-NEXT: .LBB13_7:
+; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: .LBB13_9:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_half_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: subq $24, %rsp
+; X64AVX-NEXT: .cfi_def_cfa_offset 32
+; X64AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss %xmm0, %xmm0
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jp .LBB13_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB13_2:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss %xmm0, %xmm0
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jp .LBB13_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB13_4:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jb .LBB13_6
+; X64AVX-NEXT: # %bb.5:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB13_6:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss %xmm0, %xmm0
+; X64AVX-NEXT: jp .LBB13_7
+; X64AVX-NEXT: # %bb.8:
+; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jmp .LBB13_9
+; X64AVX-NEXT: .LBB13_7:
+; X64AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX-NEXT: .LBB13_9:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: addq $24, %rsp
+; X64AVX-NEXT: .cfi_def_cfa_offset 8
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_half_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsh %xmm1, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsh {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz half @llvm.minimumnum.f16(half %x, half %y)
+ ret half %z
+}
+
+define half @minimumnum_half_nnan(half %x, half %y) {
+; X64-LABEL: minimumnum_half_nnan:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: pushq %r14
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: subq $48, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 80
+; X64-NEXT: .cfi_offset %rbx, -32
+; X64-NEXT: .cfi_offset %r14, -24
+; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: pextrw $0, %xmm1, %ebx
+; X64-NEXT: pextrw $0, %xmm0, %ebp
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: jb .LBB14_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB14_2:
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: pextrw $0, %xmm0, %r14d
+; X64-NEXT: movzwl %bp, %eax
+; X64-NEXT: cmpl $32768, %eax # imm = 0x8000
+; X64-NEXT: movl %r14d, %eax
+; X64-NEXT: cmovel %ebp, %eax
+; X64-NEXT: movzwl %bx, %ecx
+; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000
+; X64-NEXT: cmovnel %eax, %ebx
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: ucomiss %xmm1, %xmm0
+; X64-NEXT: cmovnel %r14d, %ebx
+; X64-NEXT: pinsrw $0, %ebx, %xmm0
+; X64-NEXT: addq $48, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: popq %r14
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_half_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: pushq %rbp
+; X64AVX-NEXT: .cfi_def_cfa_offset 16
+; X64AVX-NEXT: pushq %r14
+; X64AVX-NEXT: .cfi_def_cfa_offset 24
+; X64AVX-NEXT: pushq %rbx
+; X64AVX-NEXT: .cfi_def_cfa_offset 32
+; X64AVX-NEXT: subq $48, %rsp
+; X64AVX-NEXT: .cfi_def_cfa_offset 80
+; X64AVX-NEXT: .cfi_offset %rbx, -32
+; X64AVX-NEXT: .cfi_offset %r14, -24
+; X64AVX-NEXT: .cfi_offset %rbp, -16
+; X64AVX-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64AVX-NEXT: vpextrw $0, %xmm1, %ebx
+; X64AVX-NEXT: vpextrw $0, %xmm0, %ebp
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: jb .LBB14_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64AVX-NEXT: .LBB14_2:
+; X64AVX-NEXT: callq __truncsfhf2 at PLT
+; X64AVX-NEXT: vpextrw $0, %xmm0, %r14d
+; X64AVX-NEXT: movzwl %bp, %eax
+; X64AVX-NEXT: cmpl $32768, %eax # imm = 0x8000
+; X64AVX-NEXT: movl %r14d, %eax
+; X64AVX-NEXT: cmovel %ebp, %eax
+; X64AVX-NEXT: movzwl %bx, %ecx
+; X64AVX-NEXT: cmpl $32768, %ecx # imm = 0x8000
+; X64AVX-NEXT: cmovnel %eax, %ebx
+; X64AVX-NEXT: callq __extendhfsf2 at PLT
+; X64AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64AVX-NEXT: vucomiss %xmm1, %xmm0
+; X64AVX-NEXT: cmovnel %r14d, %ebx
+; X64AVX-NEXT: vpinsrw $0, %ebx, %xmm0, %xmm0
+; X64AVX-NEXT: addq $48, %rsp
+; X64AVX-NEXT: .cfi_def_cfa_offset 32
+; X64AVX-NEXT: popq %rbx
+; X64AVX-NEXT: .cfi_def_cfa_offset 24
+; X64AVX-NEXT: popq %r14
+; X64AVX-NEXT: .cfi_def_cfa_offset 16
+; X64AVX-NEXT: popq %rbp
+; X64AVX-NEXT: .cfi_def_cfa_offset 8
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: minimumnum_half_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclasssh $5, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vminsh %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan half @llvm.minimumnum.f16(half %x, half %y)
+ ret half %z
+}
+
More information about the llvm-commits
mailing list