[llvm] SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom (PR #104564)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 01:56:46 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-x86
Author: YunQiang Su (wzssyqa)
<details>
<summary>Changes</summary>
The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included.
Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.
---
Patch is 57.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104564.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+10-2)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3)
- (added) llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll (+1363)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2c939967a5e1d9..7ee28d08d556c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8622,8 +8622,16 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
// If MinMax is NaN, let's quiet it.
if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
!DAG.isKnownNeverNaN(RHS)) {
- SDValue MinMaxQuiet =
- DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+ SDValue MinMaxQuiet;
+ if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) {
+ MinMaxQuiet = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+ } else {
+ // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN.
+ // ISD::FCANONICALIZE is supported by MIPS.
+ // HPPA is not supported by LLVM yet.
+ MinMaxQuiet =
+ DAG.getConstantFP(APFloat::getQNaN(VT.getFltSemantics()), DL, VT);
+ }
MinMax =
DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11c9a992cbdee9..e929d98e959715 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -608,6 +608,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, VT, Action);
setOperationAction(ISD::FMINIMUM, VT, Action);
setOperationAction(ISD::FMAXIMUM, VT, Action);
+ setOperationAction(ISD::FCANONICALIZE, VT, Action);
setOperationAction(ISD::FSIN, VT, Action);
setOperationAction(ISD::FCOS, VT, Action);
setOperationAction(ISD::FSINCOS, VT, Action);
@@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
+
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
// Half type will be promoted by default.
diff --git a/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..6dd7e582fae0b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,1363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=x86_64 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc --mtriple=x86_64 --mattr=+avx < %s | FileCheck %s --check-prefix=X64AVX
+; RUN: llc --mtriple=x86_64 --mattr=+avx512fp16 < %s | FileCheck %s --check-prefix=X64AVX512FP16
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; X64-LABEL: maximumnum_float:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm3
+; X64-NEXT: cmpunordss %xmm1, %xmm3
+; X64-NEXT: movaps %xmm3, %xmm0
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm3
+; X64-NEXT: orps %xmm0, %xmm3
+; X64-NEXT: movaps %xmm3, %xmm0
+; X64-NEXT: cmpltss %xmm2, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: andps %xmm2, %xmm1
+; X64-NEXT: andnps %xmm3, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: cmpunordss %xmm0, %xmm1
+; X64-NEXT: movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm1, %xmm4
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm1, %xmm4
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: cmpeqss %xmm4, %xmm1
+; X64-NEXT: movd %xmm2, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB0_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movaps %xmm4, %xmm2
+; X64-NEXT: .LBB0_2:
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: andnps %xmm4, %xmm0
+; X64-NEXT: movd %xmm3, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB0_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: .LBB0_4:
+; X64-NEXT: andps %xmm3, %xmm1
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_float:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltss %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT: vmovd %xmm2, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: je .LBB0_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX-NEXT: .LBB0_2:
+; X64AVX-NEXT: vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT: vmovd %xmm0, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: je .LBB0_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX-NEXT: .LBB0_4:
+; X64AVX-NEXT: vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT: testl %eax, %eax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT: testl %eax, %eax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nsz:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: cmpunordss %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm3
+; X64-NEXT: andps %xmm2, %xmm3
+; X64-NEXT: andnps %xmm1, %xmm0
+; X64-NEXT: orps %xmm3, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: cmpltss %xmm2, %xmm1
+; X64-NEXT: andps %xmm1, %xmm2
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm2, %xmm1
+; X64-NEXT: movaps %xmm1, %xmm2
+; X64-NEXT: cmpunordss %xmm1, %xmm2
+; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm2
+; X64-NEXT: orps %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_float_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: js .LBB2_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: movdqa %xmm0, %xmm2
+; X64-NEXT: jmp .LBB2_3
+; X64-NEXT: .LBB2_1:
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: movdqa %xmm0, %xmm1
+; X64-NEXT: .LBB2_3:
+; X64-NEXT: maxss %xmm2, %xmm1
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_float_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovd %xmm0, %eax
+; X64AVX-NEXT: testl %eax, %eax
+; X64AVX-NEXT: js .LBB2_1
+; X64AVX-NEXT: # %bb.2:
+; X64AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+; X64AVX-NEXT: .LBB2_1:
+; X64AVX-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclassss $3, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; X64-LABEL: maximumnum_double:
+; X64: # %bb.0:
+; X64-NEXT: movapd %xmm0, %xmm2
+; X64-NEXT: cmpunordsd %xmm0, %xmm2
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: andpd %xmm1, %xmm3
+; X64-NEXT: andnpd %xmm0, %xmm2
+; X64-NEXT: orpd %xmm3, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm3
+; X64-NEXT: cmpunordsd %xmm1, %xmm3
+; X64-NEXT: movapd %xmm3, %xmm0
+; X64-NEXT: andpd %xmm2, %xmm0
+; X64-NEXT: andnpd %xmm1, %xmm3
+; X64-NEXT: orpd %xmm0, %xmm3
+; X64-NEXT: movapd %xmm3, %xmm0
+; X64-NEXT: cmpltsd %xmm2, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: andpd %xmm2, %xmm1
+; X64-NEXT: andnpd %xmm3, %xmm0
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: cmpunordsd %xmm0, %xmm1
+; X64-NEXT: movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT: andpd %xmm1, %xmm4
+; X64-NEXT: andnpd %xmm0, %xmm1
+; X64-NEXT: orpd %xmm1, %xmm4
+; X64-NEXT: xorpd %xmm1, %xmm1
+; X64-NEXT: cmpeqsd %xmm4, %xmm1
+; X64-NEXT: movq %xmm2, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: je .LBB3_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movapd %xmm4, %xmm2
+; X64-NEXT: .LBB3_2:
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: andnpd %xmm4, %xmm0
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: je .LBB3_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: .LBB3_4:
+; X64-NEXT: andpd %xmm3, %xmm1
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_double:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltsd %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT: vmovq %xmm2, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: je .LBB3_2
+; X64AVX-NEXT: # %bb.1:
+; X64AVX-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX-NEXT: .LBB3_2:
+; X64AVX-NEXT: vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT: vmovq %xmm0, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: je .LBB3_4
+; X64AVX-NEXT: # %bb.3:
+; X64AVX-NEXT: vmovapd %xmm2, %xmm0
+; X64AVX-NEXT: .LBB3_4:
+; X64AVX-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT: vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT: vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT: testq %rax, %rax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT: testq %rax, %rax
+; X64AVX512FP16-NEXT: sete %al
+; X64AVX512FP16-NEXT: kmovd %eax, %k2
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call double @llvm.maximumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nsz:
+; X64: # %bb.0:
+; X64-NEXT: movapd %xmm0, %xmm2
+; X64-NEXT: cmpunordsd %xmm0, %xmm2
+; X64-NEXT: movapd %xmm2, %xmm3
+; X64-NEXT: andpd %xmm1, %xmm3
+; X64-NEXT: andnpd %xmm0, %xmm2
+; X64-NEXT: orpd %xmm3, %xmm2
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: cmpunordsd %xmm1, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm3
+; X64-NEXT: andpd %xmm2, %xmm3
+; X64-NEXT: andnpd %xmm1, %xmm0
+; X64-NEXT: orpd %xmm3, %xmm0
+; X64-NEXT: movapd %xmm0, %xmm1
+; X64-NEXT: cmpltsd %xmm2, %xmm1
+; X64-NEXT: andpd %xmm1, %xmm2
+; X64-NEXT: andnpd %xmm0, %xmm1
+; X64-NEXT: orpd %xmm2, %xmm1
+; X64-NEXT: movapd %xmm1, %xmm2
+; X64-NEXT: cmpunordsd %xmm1, %xmm2
+; X64-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT: andpd %xmm2, %xmm0
+; X64-NEXT: andnpd %xmm1, %xmm2
+; X64-NEXT: orpd %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_double_nsz:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nsz:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nnan:
+; X64: # %bb.0:
+; X64-NEXT: movq %xmm0, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: js .LBB5_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: movdqa %xmm0, %xmm2
+; X64-NEXT: jmp .LBB5_3
+; X64-NEXT: .LBB5_1:
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: movdqa %xmm0, %xmm1
+; X64-NEXT: .LBB5_3:
+; X64-NEXT: maxsd %xmm2, %xmm1
+; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: maximumnum_double_nnan:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovq %xmm0, %rax
+; X64AVX-NEXT: testq %rax, %rax
+; X64AVX-NEXT: js .LBB5_1
+; X64AVX-NEXT: # %bb.2:
+; X64AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT: retq
+; X64AVX-NEXT: .LBB5_1:
+; X64AVX-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX-NEXT: vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT: retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nnan:
+; X64AVX512FP16: # %bb.0:
+; X64AVX512FP16-NEXT: vfpclasssd $3, %xmm0, %k1
+; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT: vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT: retq
+ %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+ ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; X64-LABEL: minimumnum_float:
+; X64: # %bb.0:
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: cmpunordss %xmm0, %xmm2
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: andps %xmm1, %xmm3
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: orps %xmm3, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm3
+; X64-NEXT: cmpunordss %xmm1, %xmm3
+; X64-NEXT: movaps %xmm3, %xmm0
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm3
+; X64-NEXT: orps %xmm0, %xmm3
+; X64-NEXT: movaps %xmm2, %xmm0
+; X64-NEXT: cmpltss %xmm3, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: andps %xmm2, %xmm1
+; X64-NEXT: andnps %xmm3, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: cmpunordss %xmm0, %xmm1
+; X64-NEXT: movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: andps %xmm1, %xmm4
+; X64-NEXT: andnps %xmm0, %xmm1
+; X64-NEXT: orps %xmm1, %xmm4
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: cmpeqss %xmm4, %xmm1
+; X64-NEXT: movd %xmm2, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: jo .LBB6_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movaps %xmm4, %xmm2
+; X64-NEXT: .LBB6_2:
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: andnps %xmm4, %xmm0
+; X64-NEXT: movd %xmm3, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: jo .LBB6_4
+; X64-NEXT: # %bb.3:
+; X64-NEXT: movaps %xmm2, %xmm3
+; X64-NEXT: .LBB6_4:
+; X64-NEXT: andps %xmm3, %xmm1
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X64AVX-LABEL: minimumnum_float:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT: vcmpltss %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT: vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/104564
More information about the llvm-commits
mailing list