[llvm] [X86][AVX10.2] Lower fmininum/fmaximum to VMINMAX* (PR #121373)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 30 21:31:10 PST 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/121373
None
>From a01d0bc0df4e5c568416380efd727145f0053814 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Tue, 31 Dec 2024 13:28:03 +0800
Subject: [PATCH] [X86][AVX10.2] Lower fmininum/fmaximum to VMINMAX*
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++
llvm/lib/Target/X86/X86InstrAVX10.td | 52 ++++--
llvm/test/CodeGen/X86/fminimum-fmaximum.ll | 192 +++++++++++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 21 ++-
4 files changed, 255 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e7f6032ee7d749..a0514e93d6598b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2442,6 +2442,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
}
if (Subtarget.hasAVX10_2_512()) {
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
@@ -2451,6 +2453,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom);
}
for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
setCondCodeAction(ISD::SETOEQ, VT, Custom);
@@ -28842,6 +28846,20 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
SDLoc DL(Op);
+ if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) {
+ unsigned Opc = 0;
+ if (VT.isVector())
+ Opc = X86ISD::VMINMAX;
+ else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64)
+ Opc = X86ISD::VMINMAXS;
+
+ if (Opc) {
+ SDValue Imm =
+ DAG.getTargetConstant(Op.getOpcode() == ISD::FMAXIMUM, DL, MVT::i32);
+ return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags());
+ }
+ }
+
uint64_t SizeInBits = VT.getScalarSizeInBits();
APInt PreferredZero = APInt::getZero(SizeInBits);
APInt OppositeZero = PreferredZero;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 0301c07dfb540b..3bc64eda01a9ce 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -403,28 +403,42 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
SDNode OpNodeSAE> {
let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in {
let mayRaiseFPException = 1 in {
- defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>;
-
- defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
- (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- (i32 timm:$src3)))>,
+ let isCodeGenOnly = 1 in {
+ def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX]>;
+
+ def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2),
+ (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ }
+ defm rri_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3)))>,
+ Sched<[WriteFMAX]>;
+
+ defm rmi_Int : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
+ (i32 timm:$src3)))>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in
- defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, {sae}, $src2, $src1",
- "$src1, $src2, {sae}, $src3",
- (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>, EVEX_B;
+ defm rrib_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
+ (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3)))>,
+ Sched<[WriteFMAX]>, EVEX_B;
}
}
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index c6da0c5ca4792c..1dcce5336895f0 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
declare float @llvm.maximum.f32(float, float)
@@ -73,6 +74,11 @@ define float @test_fmaximum(float %x, float %y) nounwind {
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -110,6 +116,11 @@ define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_scalarize:
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
@@ -129,6 +140,11 @@ define float @test_fmaximum_nan0(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nan0:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -148,6 +164,11 @@ define float @test_fmaximum_nan1(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nan1:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -215,6 +236,13 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX10_2-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxss $1, %xmm0, %xmm2
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nnan:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -272,6 +300,12 @@ define double @test_fmaximum_zero0(double %x, double %y) nounwind {
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxsd $1, %xmm0, %xmm1
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero0:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -323,6 +357,12 @@ define double @test_fmaximum_zero1(double %x, double %y) nounwind {
; AVX512-NEXT: vmovapd %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxsd $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -354,6 +394,11 @@ define double @test_fmaximum_zero2(double %x, double %y) {
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero2:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero2:
; X86: # %bb.0:
; X86-NEXT: fldz
@@ -390,6 +435,11 @@ define float @test_fmaximum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="t
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nsz:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nsz:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -474,6 +524,12 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vmaxss %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_combine_cmps:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_combine_cmps:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -562,6 +618,11 @@ define float @test_fminimum(float %x, float %y) nounwind {
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -599,6 +660,11 @@ define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_scalarize:
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
@@ -618,6 +684,11 @@ define float @test_fminimum_nan0(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nan0:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -637,6 +708,11 @@ define float @test_fminimum_nan1(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nan1:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -695,6 +771,11 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
; AVX512DQ-NEXT: vminsd %xmm2, %xmm1, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nnan:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -749,6 +830,11 @@ define double @test_fminimum_zero0(double %x, double %y) nounwind {
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero0:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -796,6 +882,11 @@ define double @test_fminimum_zero1(double %x, double %y) nounwind {
; AVX512-NEXT: vmovapd %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -826,6 +917,11 @@ define double @test_fminimum_zero2(double %x, double %y) {
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero2:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero2:
; X86: # %bb.0:
; X86-NEXT: fldz
@@ -863,6 +959,11 @@ define float @test_fminimum_nsz(float %x, float %y) nounwind {
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nsz:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nsz:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -948,6 +1049,12 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vminss %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_combine_cmps:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_combine_cmps:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -1009,6 +1116,11 @@ define <2 x double> @test_fminimum_vector(<2 x double> %x, <2 x double> %y) {
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector:
; X86: # %bb.0:
; X86-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
@@ -1032,6 +1144,11 @@ define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nan
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector:
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
@@ -1054,6 +1171,12 @@ define <2 x double> @test_fminimum_vector_zero(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_zero:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1077,6 +1200,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_signed_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_signed_zero:
; X86: # %bb.0:
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
@@ -1102,6 +1230,13 @@ define <2 x double> @test_fminimum_vector_partially_zero(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_partially_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_partially_zero:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1149,6 +1284,13 @@ define <2 x double> @test_fminimum_vector_different_zeros(<2 x double> %x) {
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_different_zeros:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_different_zeros:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1177,6 +1319,11 @@ define <4 x float> @test_fmaximum_vector_non_zero(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_non_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_non_zero:
; X86: # %bb.0:
; X86-NEXT: vmovaps {{.*#+}} xmm1 = [5.0E+0,4.0E+0,3.0E+0,2.0E+0]
@@ -1206,6 +1353,13 @@ define <2 x double> @test_fminimum_vector_nan(<2 x double> %x) {
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_nan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_nan:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1232,6 +1386,12 @@ define <2 x double> @test_fminimum_vector_zero_first(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_zero_first:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_zero_first:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1260,6 +1420,11 @@ define <2 x double> @test_fminimum_vector_signed_zero(<2 x double> %x) {
; AVX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_signed_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_signed_zero:
; X86: # %bb.0:
; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1
@@ -1284,6 +1449,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero_first(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_signed_zero_first:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_signed_zero_first:
; X86: # %bb.0:
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
@@ -1314,6 +1484,12 @@ define <4 x float> @test_fmaximum_vector_zero(<4 x float> %x) {
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_zero:
; X86: # %bb.0:
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
@@ -1369,6 +1545,12 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_v4f32_splat:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vbroadcastss %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_v4f32_splat:
; X86: # %bb.0:
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
@@ -1803,6 +1985,11 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_v4f16:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxph $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_v4f16:
; X86: # %bb.0:
; X86-NEXT: subl $164, %esp
@@ -2330,6 +2517,11 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
; AVX512-NEXT: .cfi_def_cfa_offset 8
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_v4bf16:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxnepbf16 $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_v4bf16:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 36f6afacdf09d6..8cfaa18a5cfac0 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -3085,9 +3085,12 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMINMAXPSZ128rri, X86::VMINMAXPSZ128rmi, 0},
{X86::VMINMAXPSZ256rri, X86::VMINMAXPSZ256rmi, 0},
{X86::VMINMAXPSZrri, X86::VMINMAXPSZrmi, 0},
- {X86::VMINMAXSDrri, X86::VMINMAXSDrmi, TB_NO_REVERSE},
- {X86::VMINMAXSHrri, X86::VMINMAXSHrmi, TB_NO_REVERSE},
- {X86::VMINMAXSSrri, X86::VMINMAXSSrmi, TB_NO_REVERSE},
+ {X86::VMINMAXSDrri, X86::VMINMAXSDrmi, 0},
+ {X86::VMINMAXSDrri_Int, X86::VMINMAXSDrmi_Int, TB_NO_REVERSE},
+ {X86::VMINMAXSHrri, X86::VMINMAXSHrmi, 0},
+ {X86::VMINMAXSHrri_Int, X86::VMINMAXSHrmi_Int, TB_NO_REVERSE},
+ {X86::VMINMAXSSrri, X86::VMINMAXSSrmi, 0},
+ {X86::VMINMAXSSrri_Int, X86::VMINMAXSSrmi_Int, TB_NO_REVERSE},
{X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rm, 0},
{X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rm, 0},
{X86::VMINPBF16Zrr, X86::VMINPBF16Zrm, 0},
@@ -5131,9 +5134,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMINMAXPSZ128rrikz, X86::VMINMAXPSZ128rmikz, 0},
{X86::VMINMAXPSZ256rrikz, X86::VMINMAXPSZ256rmikz, 0},
{X86::VMINMAXPSZrrikz, X86::VMINMAXPSZrmikz, 0},
- {X86::VMINMAXSDrrikz, X86::VMINMAXSDrmikz, TB_NO_REVERSE},
- {X86::VMINMAXSHrrikz, X86::VMINMAXSHrmikz, TB_NO_REVERSE},
- {X86::VMINMAXSSrrikz, X86::VMINMAXSSrmikz, TB_NO_REVERSE},
+ {X86::VMINMAXSDrri_Intkz, X86::VMINMAXSDrmi_Intkz, TB_NO_REVERSE},
+ {X86::VMINMAXSHrri_Intkz, X86::VMINMAXSHrmi_Intkz, TB_NO_REVERSE},
+ {X86::VMINMAXSSrri_Intkz, X86::VMINMAXSSrmi_Intkz, TB_NO_REVERSE},
{X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmkz, 0},
{X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmkz, 0},
{X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmkz, 0},
@@ -6753,9 +6756,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMINMAXPSZ128rrik, X86::VMINMAXPSZ128rmik, 0},
{X86::VMINMAXPSZ256rrik, X86::VMINMAXPSZ256rmik, 0},
{X86::VMINMAXPSZrrik, X86::VMINMAXPSZrmik, 0},
- {X86::VMINMAXSDrrik, X86::VMINMAXSDrmik, TB_NO_REVERSE},
- {X86::VMINMAXSHrrik, X86::VMINMAXSHrmik, TB_NO_REVERSE},
- {X86::VMINMAXSSrrik, X86::VMINMAXSSrmik, TB_NO_REVERSE},
+ {X86::VMINMAXSDrri_Intk, X86::VMINMAXSDrmi_Intk, TB_NO_REVERSE},
+ {X86::VMINMAXSHrri_Intk, X86::VMINMAXSHrmi_Intk, TB_NO_REVERSE},
+ {X86::VMINMAXSSrri_Intk, X86::VMINMAXSSrmi_Intk, TB_NO_REVERSE},
{X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmk, 0},
{X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmk, 0},
{X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmk, 0},
More information about the llvm-commits
mailing list