[llvm] [X86][AVX10.2] Lower fmininum/fmaximum to VMINMAX* (PR #121373)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 30 21:31:44 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
---
Patch is 24.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121373.diff
4 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+18)
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+33-19)
- (modified) llvm/test/CodeGen/X86/fminimum-fmaximum.ll (+192)
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+12-9)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e7f6032ee7d749..a0514e93d6598b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2442,6 +2442,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
}
if (Subtarget.hasAVX10_2_512()) {
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
@@ -2451,6 +2453,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom);
}
for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
setCondCodeAction(ISD::SETOEQ, VT, Custom);
@@ -28842,6 +28846,20 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
SDLoc DL(Op);
+ if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) {
+ unsigned Opc = 0;
+ if (VT.isVector())
+ Opc = X86ISD::VMINMAX;
+ else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64)
+ Opc = X86ISD::VMINMAXS;
+
+ if (Opc) {
+ SDValue Imm =
+ DAG.getTargetConstant(Op.getOpcode() == ISD::FMAXIMUM, DL, MVT::i32);
+ return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags());
+ }
+ }
+
uint64_t SizeInBits = VT.getScalarSizeInBits();
APInt PreferredZero = APInt::getZero(SizeInBits);
APInt OppositeZero = PreferredZero;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 0301c07dfb540b..3bc64eda01a9ce 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -403,28 +403,42 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
SDNode OpNodeSAE> {
let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in {
let mayRaiseFPException = 1 in {
- defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>;
-
- defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
- (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- (i32 timm:$src3)))>,
+ let isCodeGenOnly = 1 in {
+ def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX]>;
+
+ def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2),
+ (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ }
+ defm rri_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3)))>,
+ Sched<[WriteFMAX]>;
+
+ defm rmi_Int : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
+ (i32 timm:$src3)))>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in
- defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, {sae}, $src2, $src1",
- "$src1, $src2, {sae}, $src3",
- (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>, EVEX_B;
+ defm rrib_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
+ (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3)))>,
+ Sched<[WriteFMAX]>, EVEX_B;
}
}
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index c6da0c5ca4792c..1dcce5336895f0 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
declare float @llvm.maximum.f32(float, float)
@@ -73,6 +74,11 @@ define float @test_fmaximum(float %x, float %y) nounwind {
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -110,6 +116,11 @@ define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_scalarize:
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
@@ -129,6 +140,11 @@ define float @test_fmaximum_nan0(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nan0:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -148,6 +164,11 @@ define float @test_fmaximum_nan1(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nan1:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -215,6 +236,13 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX10_2-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxss $1, %xmm0, %xmm2
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nnan:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -272,6 +300,12 @@ define double @test_fmaximum_zero0(double %x, double %y) nounwind {
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxsd $1, %xmm0, %xmm1
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero0:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -323,6 +357,12 @@ define double @test_fmaximum_zero1(double %x, double %y) nounwind {
; AVX512-NEXT: vmovapd %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxsd $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -354,6 +394,11 @@ define double @test_fmaximum_zero2(double %x, double %y) {
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero2:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero2:
; X86: # %bb.0:
; X86-NEXT: fldz
@@ -390,6 +435,11 @@ define float @test_fmaximum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="t
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nsz:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nsz:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -474,6 +524,12 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vmaxss %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_combine_cmps:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_combine_cmps:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -562,6 +618,11 @@ define float @test_fminimum(float %x, float %y) nounwind {
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -599,6 +660,11 @@ define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_scalarize:
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
@@ -618,6 +684,11 @@ define float @test_fminimum_nan0(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nan0:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -637,6 +708,11 @@ define float @test_fminimum_nan1(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nan1:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -695,6 +771,11 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
; AVX512DQ-NEXT: vminsd %xmm2, %xmm1, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nnan:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -749,6 +830,11 @@ define double @test_fminimum_zero0(double %x, double %y) nounwind {
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero0:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -796,6 +882,11 @@ define double @test_fminimum_zero1(double %x, double %y) nounwind {
; AVX512-NEXT: vmovapd %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -826,6 +917,11 @@ define double @test_fminimum_zero2(double %x, double %y) {
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero2:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero2:
; X86: # %bb.0:
; X86-NEXT: fldz
@@ -863,6 +959,11 @@ define float @test_fminimum_nsz(float %x, float %y) nounwind {
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nsz:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nsz:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -948,6 +1049,12 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vminss %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_combine_cmps:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_combine_cmps:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -1009,6 +1116,11 @@ define <2 x double> @test_fminimum_vector(<2 x double> %x, <2 x double> %y) {
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector:
; X86: # %bb.0:
; X86-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
@@ -1032,6 +1144,11 @@ define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nan
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector:
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
@@ -1054,6 +1171,12 @@ define <2 x double> @test_fminimum_vector_zero(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_zero:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1077,6 +1200,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_signed_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_signed_zero:
; X86: # %bb.0:
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
@@ -1102,6 +1230,13 @@ define <2 x double> @test_fminimum_vector_partially_zero(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_partially_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_partially_zero:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1149,6 +1284,13 @@ define <2 x double> @test_fminimum_vector_different_zeros(<2 x double> %x) {
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_different_zeros:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_different_zeros:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1177,6 +1319,11 @@ define <4 x float> @test_fmaximum_vector_non_zero(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_non_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_non_zero:
; X86: # %bb.0:
; X86-NEXT: vmovaps {{.*#+}} xmm1 = [5.0E+0,4.0E+0,3.0E+0,2.0E+0]
@@ -1206,6 +1353,13 @@ define <2 x double> @test_fminimum_vector_nan(<2 x double> %x) {
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_nan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_nan:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1232,6 +1386,12 @@ define <2 x double> @test_fminimum_vector_zero_first(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_zero_first:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_zero_first:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1260,6 +1420,11 @@ define <2 x double> @test_fminimum_vector_signed_zero(<2 x double> %x) {
; AVX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_signed_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_signed_zero:
; X86: # %bb.0:
; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1
@@ -1284,6 +1449,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero_first(<4 x float> %x) {
; AVX-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121373
More information about the llvm-commits
mailing list