[llvm] [X86] Support lowering of FMINIMUMNUM/FMAXIMUMNUM (PR #121464)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 02:08:37 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
---
Patch is 103.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121464.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+2)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+2)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+35-5)
- (added) llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll (+2553)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index db21e708970648..de1bec1c130d20 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -402,6 +402,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107454a92e356c..780eba16c9c498 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -149,6 +149,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::FLDEXP:
case ISD::ABDS:
case ISD::ABDU:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0514e93d6598b..219674b1e84131 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -623,6 +623,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, VT, Action);
setOperationAction(ISD::FMINIMUM, VT, Action);
setOperationAction(ISD::FMAXIMUM, VT, Action);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Action);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Action);
setOperationAction(ISD::FSIN, VT, Action);
setOperationAction(ISD::FCOS, VT, Action);
setOperationAction(ISD::FSINCOS, VT, Action);
@@ -1066,6 +1068,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
@@ -1108,6 +1112,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
}
for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
@@ -1473,6 +1479,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}
@@ -1818,6 +1826,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
@@ -2289,6 +2299,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
@@ -2336,6 +2348,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, MVT::v32f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v32f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v32f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v32f16, Custom);
}
if (Subtarget.hasVLX()) {
@@ -2383,9 +2397,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v8f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v8f16, Custom);
setOperationAction(ISD::FMINIMUM, MVT::v16f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v16f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v16f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v16f16, Custom);
}
}
@@ -2444,6 +2462,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
}
if (Subtarget.hasAVX10_2_512()) {
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
@@ -2455,6 +2475,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v32bf16, Custom);
}
for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
setCondCodeAction(ISD::SETOEQ, VT, Custom);
@@ -28839,13 +28861,15 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert((Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMINIMUM) &&
- "Expected FMAXIMUM or FMINIMUM opcode");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = Op.getValueType();
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
SDLoc DL(Op);
+ bool IsMaxOp =
+ Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMAXIMUMNUM;
+ bool IsNum =
+ Op.getOpcode() == ISD::FMINIMUMNUM || Op.getOpcode() == ISD::FMAXIMUMNUM;
if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) {
unsigned Opc = 0;
if (VT.isVector())
@@ -28855,7 +28879,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
if (Opc) {
SDValue Imm =
- DAG.getTargetConstant(Op.getOpcode() == ISD::FMAXIMUM, DL, MVT::i32);
+ DAG.getTargetConstant(IsMaxOp + (IsNum ? 16 : 0), DL, MVT::i32);
return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags());
}
}
@@ -28865,7 +28889,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
APInt OppositeZero = PreferredZero;
EVT IVT = VT.changeTypeToInteger();
X86ISD::NodeType MinMaxOp;
- if (Op.getOpcode() == ISD::FMAXIMUM) {
+ if (IsMaxOp) {
MinMaxOp = X86ISD::FMAX;
OppositeZero.setSignBit();
} else {
@@ -28995,7 +29019,9 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
if (IgnoreNaN || DAG.isKnownNeverNaN(NewX))
return MinMax;
- SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NewX, NewX, ISD::SETUO);
+ SDValue IsNaN =
+ DAG.getSetCC(DL, SetCCType, NewX, NewX, IsNum ? ISD::SETO : ISD::SETUO);
+
return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);
}
@@ -33253,6 +33279,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG);
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
return LowerFMINIMUM_FMAXIMUM(Op, Subtarget, DAG);
case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
case ISD::ABDS:
@@ -45994,6 +46022,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
case ISD::FMAXNUM_IEEE:
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
+ case ISD::FMAXIMUMNUM:
+ case ISD::FMINIMUMNUM:
case X86ISD::FMAX:
case X86ISD::FMIN:
case ISD::FABS: // Begin 1 operand
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
new file mode 100644
index 00000000000000..b183994280a474
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -0,0 +1,2553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+declare <2 x double> @llvm.minimumnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x float> @llvm.maximumnum.v4f32(<4 x float>, <4 x float>)
+declare <4 x half> @llvm.maximumnum.v4f16(<4 x half>, <4 x half>)
+declare <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat>, <4 x bfloat>)
+
+;
+; fmaximumnum
+;
+
+define float @test_fmaximumnum(float %x, float %y) nounwind {
+; SSE2-LABEL: test_fmaximumnum:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: js .LBB0_2
+; SSE2-NEXT: # %bb.1:
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: .LBB0_2:
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: cmpordss %xmm3, %xmm0
+; SSE2-NEXT: movaps %xmm0, %xmm4
+; SSE2-NEXT: andps %xmm3, %xmm4
+; SSE2-NEXT: js .LBB0_4
+; SSE2-NEXT: # %bb.3:
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: .LBB0_4:
+; SSE2-NEXT: maxss %xmm1, %xmm3
+; SSE2-NEXT: andnps %xmm3, %xmm0
+; SSE2-NEXT: orps %xmm4, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: test_fmaximumnum:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: testl %eax, %eax
+; AVX1-NEXT: js .LBB0_1
+; AVX1-NEXT: # %bb.2:
+; AVX1-NEXT: vmovdqa %xmm0, %xmm2
+; AVX1-NEXT: jmp .LBB0_3
+; AVX1-NEXT: .LBB0_1:
+; AVX1-NEXT: vmovdqa %xmm1, %xmm2
+; AVX1-NEXT: vmovdqa %xmm0, %xmm1
+; AVX1-NEXT: .LBB0_3:
+; AVX1-NEXT: vmaxss %xmm2, %xmm1, %xmm0
+; AVX1-NEXT: vcmpordss %xmm1, %xmm1, %xmm2
+; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: test_fmaximumnum:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: testl %eax, %eax
+; AVX512-NEXT: sets %al
+; AVX512-NEXT: kmovw %eax, %k1
+; AVX512-NEXT: vmovdqa %xmm0, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; AVX512-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $17, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-NEXT: vmovd %xmm2, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: js .LBB0_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: vmovdqa %xmm2, %xmm1
+; X86-NEXT: jmp .LBB0_3
+; X86-NEXT: .LBB0_1:
+; X86-NEXT: vmovdqa %xmm0, %xmm1
+; X86-NEXT: vmovdqa %xmm2, %xmm0
+; X86-NEXT: .LBB0_3:
+; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm1
+; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+ %1 = tail call float @llvm.maximumnum.f32(float %x, float %y)
+ ret float %1
+}
+
+define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+; SSE2-LABEL: test_fmaximumnum_scalarize:
+; SSE2: # %bb.0:
+; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximumnum_scalarize:
+; AVX: # %bb.0:
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $17, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_scalarize:
+; X86: # %bb.0:
+; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; X86-NEXT: retl
+ %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
+ ret <4 x float> %r
+}
+
+define float @test_fmaximumnum_nan0(float %x, float %y) {
+; SSE2-LABEL: test_fmaximumnum_nan0:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximumnum_nan0:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovaps %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_nan0:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = tail call float @llvm.maximumnum.f32(float 0x7fff000000000000, float %y)
+ ret float %1
+}
+
+define float @test_fmaximumnum_nan1(float %x, float %y) {
+; SSE2-LABEL: test_fmaximumnum_nan1:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximumnum_nan1:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_nan1:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = tail call float @llvm.maximumnum.f32(float %x, float 0x7fff000000000000)
+ ret float %1
+}
+
+define float @test_fmaximumnum_nnan(float %x, float %y) nounwind {
+; SSE2-LABEL: test_fmaximumnum_nnan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: addss %xmm1, %xmm2
+; SSE2-NEXT: subss %xmm1, %xmm0
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: js .LBB4_1
+; SSE2-NEXT: # %bb.2:
+; SSE2-NEXT: maxss %xmm2, %xmm0
+; SSE2-NEXT: retq
+; SSE2-NEXT: .LBB4_1:
+; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: test_fmaximumnum_nnan:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm2, %eax
+; AVX1-NEXT: testl %eax, %eax
+; AVX1-NEXT: js .LBB4_1
+; AVX1-NEXT: # %bb.2:
+; AVX1-NEXT: vmaxss %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB4_1:
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmaxss %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512F-LABEL: test_fmaximumnum_nnan:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: testl %eax, %eax
+; AVX512F-NEXT: sets %al
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vmovaps %xmm2, %xmm1
+; AVX512F-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512F-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1}
+; AVX512F-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: test_fmaximumnum_nnan:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX512DQ-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: vfpclassss $3, %xmm0, %k0 # k0 = isQuietNaN(xmm0) | isPositiveZero(xmm0)
+; AVX512DQ-NEXT: kmovw %k0, %k1
+; AVX512DQ-NEXT: vmovaps %xmm2, %xmm1
+; AVX512DQ-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512DQ-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1}
+; AVX512DQ-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX10_2-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxss $17, %xmm0, %xmm2
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_nnan:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-NEXT: vaddss %xmm0, %xmm2, %xmm1
+; X86-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; X86-NEXT: vmovd %xmm1, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: js .LBB4_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: vmovaps %xmm1, %xmm2
+; X86-NEXT: jmp .LBB4_3
+; X86-NEXT: .LBB4_1:
+; X86-NEXT: vmovaps %xmm0, %xmm2
+; X86-NEXT: vmovaps %xmm1, %xmm0
+; X86-NEXT: .LBB4_3:
+; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+ %1 = fadd nnan float %x, %y
+ %2 = fsub nnan float %x, %y
+ %3 = tail call float @llvm.maximumnum.f32(float %1, float %2)
+ ret float %3
+}
+
+define double @test_fmaximumnum_zero0(double %x, double %y) nounwind {
+; SSE2-LABEL: test_fmaximumnum_zero0:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: cmpordsd %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: andpd %xmm1, %xmm2
+; SSE2-NEXT: xorpd %xmm3, %xmm3
+; SSE2-NEXT: maxsd %xmm3, %xmm1
+; SSE2-NEXT: andnpd %xmm1, %xmm0
+; SSE2-NEXT: orpd %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: test_fmaximumnum_zero0:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpordsd %xmm1, %xmm1, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: test_fmaximumnum_zero0:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcmpordsd %xmm1, %xmm1, %k1
+; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; AVX512-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxsd $17, %xmm0, %xmm1
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_zero0:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
+; X86-NEXT: vcmpordsd %xmm0, %xmm0, %xmm2
+; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT: vmovlpd %xmm0, (%esp)
+; X86-NEXT: fldl (%esp)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %1 = tail call double @llvm.maximumnum.f64(double 0.0, double %y)
+ ret double %1
+}
+
+define double @test_fmaximumnum_zero1(double %x, double %y) nounwind {
+; SSE2-LABEL: test_fmaximumnum_zero1:
+; SSE2: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121464
More information about the llvm-commits
mailing list