[llvm] SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom (PR #104564)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 16 01:56:46 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: YunQiang Su (wzssyqa)

<details>
<summary>Changes</summary>

The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included.

Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.

---

Patch is 57.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104564.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+10-2) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3) 
- (added) llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll (+1363) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2c939967a5e1d9..7ee28d08d556c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8622,8 +8622,16 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
   // If MinMax is NaN, let's quiet it.
   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
       !DAG.isKnownNeverNaN(RHS)) {
-    SDValue MinMaxQuiet =
-        DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    SDValue MinMaxQuiet;
+    if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) {
+      MinMaxQuiet = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    } else {
+      // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN.
+      // ISD::FCANONICALIZE is supported by MIPS.
+      // HPPA is not supported by LLVM yet.
+      MinMaxQuiet =
+          DAG.getConstantFP(APFloat::getQNaN(VT.getFltSemantics()), DL, VT);
+    }
     MinMax =
         DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
   }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11c9a992cbdee9..e929d98e959715 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -608,6 +608,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FMAXNUM, VT, Action);
     setOperationAction(ISD::FMINIMUM, VT, Action);
     setOperationAction(ISD::FMAXIMUM, VT, Action);
+    setOperationAction(ISD::FCANONICALIZE, VT, Action);
     setOperationAction(ISD::FSIN, VT, Action);
     setOperationAction(ISD::FCOS, VT, Action);
     setOperationAction(ISD::FSINCOS, VT, Action);
@@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
+
+      setOperationAction(ISD::FCANONICALIZE, VT, Expand);
     }
 
     // Half type will be promoted by default.
diff --git a/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..6dd7e582fae0b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,1363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=x86_64 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc --mtriple=x86_64 --mattr=+avx < %s | FileCheck %s --check-prefix=X64AVX
+; RUN: llc --mtriple=x86_64 --mattr=+avx512fp16 < %s | FileCheck %s --check-prefix=X64AVX512FP16
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; X64-LABEL: maximumnum_float:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm3
+; X64-NEXT:    cmpunordss %xmm1, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm3
+; X64-NEXT:    orps %xmm0, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    cmpltss %xmm2, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    andps %xmm2, %xmm1
+; X64-NEXT:    andnps %xmm3, %xmm0
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpunordss %xmm0, %xmm1
+; X64-NEXT:    movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm1, %xmm4
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm4
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    cmpeqss %xmm4, %xmm1
+; X64-NEXT:    movd %xmm2, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    je .LBB0_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movaps %xmm4, %xmm2
+; X64-NEXT:  .LBB0_2:
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm4, %xmm0
+; X64-NEXT:    movd %xmm3, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    je .LBB0_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:  .LBB0_4:
+; X64-NEXT:    andps %xmm3, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltss %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovd %xmm2, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    je .LBB0_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB0_2:
+; X64AVX-NEXT:    vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    je .LBB0_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB0_4:
+; X64AVX-NEXT:    vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT:    testl %eax, %eax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT:    testl %eax, %eax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    cmpunordss %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm3
+; X64-NEXT:    andps %xmm2, %xmm3
+; X64-NEXT:    andnps %xmm1, %xmm0
+; X64-NEXT:    orps %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpltss %xmm2, %xmm1
+; X64-NEXT:    andps %xmm1, %xmm2
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm2
+; X64-NEXT:    cmpunordss %xmm1, %xmm2
+; X64-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm2
+; X64-NEXT:    orps %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltss %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    js .LBB2_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    jmp .LBB2_3
+; X64-NEXT:  .LBB2_1:
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:  .LBB2_3:
+; X64-NEXT:    maxss %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    js .LBB2_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB2_1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclassss $3, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; X64-LABEL: maximumnum_double:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm3
+; X64-NEXT:    cmpunordsd %xmm1, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm3
+; X64-NEXT:    orpd %xmm0, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    cmpltsd %xmm2, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    andpd %xmm2, %xmm1
+; X64-NEXT:    andnpd %xmm3, %xmm0
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpunordsd %xmm0, %xmm1
+; X64-NEXT:    movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm1, %xmm4
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm4
+; X64-NEXT:    xorpd %xmm1, %xmm1
+; X64-NEXT:    cmpeqsd %xmm4, %xmm1
+; X64-NEXT:    movq %xmm2, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    je .LBB3_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movapd %xmm4, %xmm2
+; X64-NEXT:  .LBB3_2:
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    andnpd %xmm4, %xmm0
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    je .LBB3_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:  .LBB3_4:
+; X64-NEXT:    andpd %xmm3, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltsd %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovq %xmm2, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    je .LBB3_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB3_2:
+; X64AVX-NEXT:    vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    je .LBB3_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB3_4:
+; X64AVX-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT:    testq %rax, %rax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT:    testq %rax, %rax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    cmpunordsd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm3
+; X64-NEXT:    andpd %xmm2, %xmm3
+; X64-NEXT:    andnpd %xmm1, %xmm0
+; X64-NEXT:    orpd %xmm3, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpltsd %xmm2, %xmm1
+; X64-NEXT:    andpd %xmm1, %xmm2
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm2
+; X64-NEXT:    cmpunordsd %xmm1, %xmm2
+; X64-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm2
+; X64-NEXT:    orpd %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltsd %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    js .LBB5_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    jmp .LBB5_3
+; X64-NEXT:  .LBB5_1:
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:  .LBB5_3:
+; X64-NEXT:    maxsd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    js .LBB5_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB5_1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclasssd $3, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; X64-LABEL: minimumnum_float:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm3
+; X64-NEXT:    cmpunordss %xmm1, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm3
+; X64-NEXT:    orps %xmm0, %xmm3
+; X64-NEXT:    movaps %xmm2, %xmm0
+; X64-NEXT:    cmpltss %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    andps %xmm2, %xmm1
+; X64-NEXT:    andnps %xmm3, %xmm0
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpunordss %xmm0, %xmm1
+; X64-NEXT:    movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm1, %xmm4
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm4
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    cmpeqss %xmm4, %xmm1
+; X64-NEXT:    movd %xmm2, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    jo .LBB6_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movaps %xmm4, %xmm2
+; X64-NEXT:  .LBB6_2:
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm4, %xmm0
+; X64-NEXT:    movd %xmm3, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    jo .LBB6_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:  .LBB6_4:
+; X64-NEXT:    andps %xmm3, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_float:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltss %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/104564


More information about the llvm-commits mailing list