[llvm] [X86][FP16][BF16] Improve vectorization of fcmp (PR #116153)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 13 19:08:18 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
---
Patch is 63.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116153.diff
10 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+40-12)
- (modified) llvm/test/Analysis/CostModel/X86/fptoi_sat.ll (+40-40)
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+11-50)
- (modified) llvm/test/CodeGen/X86/avx512-vec-cmp.ll (+20-64)
- (modified) llvm/test/CodeGen/X86/fminimum-fmaximum.ll (+20-182)
- (modified) llvm/test/CodeGen/X86/half.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/pr114520.ll (+2-77)
- (modified) llvm/test/CodeGen/X86/pr57340.ll (+6-229)
- (modified) llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll (+7-12)
- (modified) llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll (+7-12)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index db04f3a48d4d03..cd107220fdf233 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1711,6 +1711,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
}
+ setOperationAction(ISD::SETCC, MVT::v8f16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16f16, Custom);
}
// This block controls legalization of the mask vector sizes that are
@@ -2046,6 +2048,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Custom);
for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
+ setOperationAction(ISD::SETCC, MVT::v32f16, Custom);
for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::MLOAD, VT, Legal);
@@ -2401,6 +2404,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(Opc, MVT::v8bf16, MVT::v8f32);
setOperationPromotedToType(Opc, MVT::v16bf16, MVT::v16f32);
}
+ setOperationAction(ISD::SETCC, MVT::v8bf16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16bf16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom);
addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
}
@@ -2411,6 +2416,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setF16Action(MVT::v32bf16, Expand);
for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
setOperationPromotedToType(Opc, MVT::v32bf16, MVT::v32f32);
+ setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom);
@@ -23333,12 +23339,8 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
/// concatenate the result back.
-static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS,
- ISD::CondCode Cond, SelectionDAG &DAG,
- const SDLoc &dl) {
- assert(VT.isInteger() && VT == LHS.getValueType() &&
- VT == RHS.getValueType() && "Unsupported VTs!");
-
+static SDValue splitVSETCC(EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond,
+ SelectionDAG &DAG, const SDLoc &dl) {
SDValue CC = DAG.getCondCode(Cond);
// Extract the LHS Lo/Hi vectors
@@ -23483,14 +23485,40 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
if (isFP) {
- MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
+ MVT SVT = Op0.getSimpleValueType();
+ MVT EltVT = SVT.getVectorElementType();
assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
EltVT == MVT::f64);
- if (isSoftF16(EltVT, Subtarget))
- return SDValue();
- bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ if (isSoftF16(EltVT, Subtarget)) {
+ // Break 256-bit FP vector compare into smaller ones.
+ if (SVT.is256BitVector() && !Subtarget.useAVX512Regs())
+ return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl);
+
+ // Break 512-bit FP vector compare into smaller ones.
+ if (SVT.is512BitVector())
+ return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl);
+
+ MVT NVT = SVT.changeVectorElementType(MVT::f32);
+ if (IsStrict) {
+ Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Chain, Op0});
+ Op1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Chain, Op1});
+ return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
+ {Chain, Op0, Op1, CC});
+ }
+ MVT DVT = VT.getVectorElementType() == MVT::i16
+ ? VT.changeVectorElementType(MVT::i32)
+ : VT;
+ SDValue Cmp = DAG.getNode(Op.getOpcode(), dl, DVT,
+ DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op0),
+ DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op1), CC);
+ return DVT == VT ? Cmp : DAG.getNode(ISD::TRUNCATE, dl, VT, Cmp);
+ }
+
+ bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
// If we have a strict compare with a vXi1 result and the input is 128/256
// bits we can't use a masked compare unless we have VLX. If we use a wider
@@ -23701,12 +23729,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
+ return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl);
// Break 512-bit integer vector compare into smaller ones.
// TODO: Try harder to use VPCMPx + VPMOV2x?
if (VT.is512BitVector())
- return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
+ return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl);
// If we have a limit constant, try to form PCMPGT (signed cmp) to avoid
// not-of-PCMPEQ:
diff --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
index 55b80350f595ea..41bf88b1ec316b 100644
--- a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
@@ -1016,45 +1016,45 @@ define void @fp16() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -1069,45 +1069,45 @@ define void @fp16() {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
-; AVX512DQ-NEXT: Cost Model: Found...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/116153
More information about the llvm-commits
mailing list