[llvm] X86: Improve cost model of fp16 conversion (PR #113195)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 21 10:42:42 PDT 2024


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->


:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 8ae39c8e34de2d24c46827b324c76bac845c18b0 4f2a5293ea966245788cc0bc2b430510f2edca13 --extensions cpp -- llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86TargetTransformInfo.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 2d2c804ed4..82455506c0 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2293,143 +2293,221 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   // 256-bit wide vectors.
 
   static const TypeConversionCostKindTblEntry AVX512FConversionTbl[] = {
-    { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f32,   { 1, 1, 1, 1 } },
-    { ISD::FP_EXTEND, MVT::v8f64,   MVT::v16f32,  { 3, 1, 1, 1 } },
-    { ISD::FP_EXTEND, MVT::v16f64,  MVT::v16f32,  { 4, 1, 1, 1 } }, // 2*vcvtps2pd+vextractf64x4
-    { ISD::FP_EXTEND, MVT::v16f32,  MVT::v16f16,  { 1, 1, 1, 1 } }, // vcvtph2ps
-    { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f16,   { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
-    { ISD::FP_ROUND,  MVT::v8f32,   MVT::v8f64,   { 1, 1, 1, 1 } },
-    { ISD::FP_ROUND,  MVT::v16f16,  MVT::v16f32,  { 1, 1, 1, 1 } }, // vcvtps2ph
-
-    { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i8,   { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
-    { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
-    { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
-    { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i16,  { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i32,   { 2, 1, 1, 1 } }, // zmm vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i32,   { 2, 1, 1, 1 } }, // zmm vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i32,   { 2, 1, 1, 1 } }, // zmm vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpslld+vptestmd
-    { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i64,   { 2, 1, 1, 1 } }, // zmm vpsllq+vptestmq
-    { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i64,   { 2, 1, 1, 1 } }, // zmm vpsllq+vptestmq
-    { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i64,   { 2, 1, 1, 1 } }, // vpsllq+vptestmq
-    { ISD::TRUNCATE,  MVT::v2i8,    MVT::v2i32,   { 2, 1, 1, 1 } }, // vpmovdb
-    { ISD::TRUNCATE,  MVT::v4i8,    MVT::v4i32,   { 2, 1, 1, 1 } }, // vpmovdb
-    { ISD::TRUNCATE,  MVT::v16i8,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdb
-    { ISD::TRUNCATE,  MVT::v32i8,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdb
-    { ISD::TRUNCATE,  MVT::v64i8,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdb
-    { ISD::TRUNCATE,  MVT::v16i16,  MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdw
-    { ISD::TRUNCATE,  MVT::v32i16,  MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdw
-    { ISD::TRUNCATE,  MVT::v2i8,    MVT::v2i64,   { 2, 1, 1, 1 } }, // vpmovqb
-    { ISD::TRUNCATE,  MVT::v2i16,   MVT::v2i64,   { 1, 1, 1, 1 } }, // vpshufb
-    { ISD::TRUNCATE,  MVT::v8i8,    MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
-    { ISD::TRUNCATE,  MVT::v16i8,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
-    { ISD::TRUNCATE,  MVT::v32i8,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
-    { ISD::TRUNCATE,  MVT::v64i8,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
-    { ISD::TRUNCATE,  MVT::v8i16,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqw
-    { ISD::TRUNCATE,  MVT::v16i16,  MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqw
-    { ISD::TRUNCATE,  MVT::v32i16,  MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqw
-    { ISD::TRUNCATE,  MVT::v8i32,   MVT::v8i64,   { 1, 1, 1, 1 } }, // vpmovqd
-    { ISD::TRUNCATE,  MVT::v4i32,   MVT::v4i64,   { 1, 1, 1, 1 } }, // zmm vpmovqd
-    { ISD::TRUNCATE,  MVT::v16i8,   MVT::v16i64,  { 5, 1, 1, 1 } },// 2*vpmovqd+concat+vpmovdb
-
-    { ISD::TRUNCATE,  MVT::v16i8,  MVT::v16i16,   { 3, 1, 1, 1 } }, // extend to v16i32
-    { ISD::TRUNCATE,  MVT::v32i8,  MVT::v32i16,   { 8, 1, 1, 1 } },
-    { ISD::TRUNCATE,  MVT::v64i8,  MVT::v32i16,   { 8, 1, 1, 1 } },
-
-    // Sign extend is zmm vpternlogd+vptruncdb.
-    // Zero extend is zmm broadcast load+vptruncdw.
-    { ISD::SIGN_EXTEND, MVT::v2i8,   MVT::v2i1,   { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v2i8,   MVT::v2i1,   { 4, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v4i8,   MVT::v4i1,   { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v4i8,   MVT::v4i1,   { 4, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v8i8,   MVT::v8i1,   { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v8i8,   MVT::v8i1,   { 4, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v16i1,  { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v16i1,  { 4, 1, 1, 1 } },
-
-    // Sign extend is zmm vpternlogd+vptruncdw.
-    // Zero extend is zmm vpternlogd+vptruncdw+vpsrlw.
-    { ISD::SIGN_EXTEND, MVT::v2i16,  MVT::v2i1,   { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v2i16,  MVT::v2i1,   { 4, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i1,   { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i1,   { 4, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i1,   { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i1,   { 4, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  { 3, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  { 4, 1, 1, 1 } },
-
-    { ISD::SIGN_EXTEND, MVT::v2i32,  MVT::v2i1,   { 1, 1, 1, 1 } }, // zmm vpternlogd
-    { ISD::ZERO_EXTEND, MVT::v2i32,  MVT::v2i1,   { 2, 1, 1, 1 } }, // zmm vpternlogd+psrld
-    { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i1,   { 1, 1, 1, 1 } }, // zmm vpternlogd
-    { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i1,   { 2, 1, 1, 1 } }, // zmm vpternlogd+psrld
-    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 1, 1, 1, 1 } }, // zmm vpternlogd
-    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 2, 1, 1, 1 } }, // zmm vpternlogd+psrld
-    { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v2i1,   { 1, 1, 1, 1 } }, // zmm vpternlogq
-    { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v2i1,   { 2, 1, 1, 1 } }, // zmm vpternlogq+psrlq
-    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 1, 1, 1, 1 } }, // zmm vpternlogq
-    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 2, 1, 1, 1 } }, // zmm vpternlogq+psrlq
-
-    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1,  { 1, 1, 1, 1 } }, // vpternlogd
-    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1,  { 2, 1, 1, 1 } }, // vpternlogd+psrld
-    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i1,   { 1, 1, 1, 1 } }, // vpternlogq
-    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i1,   { 2, 1, 1, 1 } }, // vpternlogq+psrlq
-
-    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8,  { 1, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  { 1, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, { 1, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, { 1, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i8,   { 1, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i8,   { 1, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i16,  { 1, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16,  { 1, 1, 1, 1 } },
-    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i32,  { 1, 1, 1, 1 } },
-    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i32,  { 1, 1, 1, 1 } },
-
-    { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8,  { 3, 1, 1, 1 } }, // FIXME: May not be right
-    { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8,  { 3, 1, 1, 1 } }, // FIXME: May not be right
-
-    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   { 4, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i1,  { 3, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i8,  { 1, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, { 1, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 1, 1, 1, 1 } },
-    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, { 1, 1, 1, 1 } },
-
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   { 4, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i1,  { 3, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i8,  { 1, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, { 1, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 1, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, { 1, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  {26, 1, 1, 1 } },
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  { 5, 1, 1, 1 } },
-
-    { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v16f32, { 2, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v16f64, { 7, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v32i8,  MVT::v32f64, {15, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v64i8,  MVT::v64f32, {11, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v64i8,  MVT::v64f64, {31, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v8f64,  { 3, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f64, { 7, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v32i16, MVT::v32f32, { 5, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v32i16, MVT::v32f64, {15, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v8i32,  MVT::v8f64,  { 1, 1, 1, 1 } },
-    { ISD::FP_TO_SINT,  MVT::v16i32, MVT::v16f64, { 3, 1, 1, 1 } },
-
-    { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f64,  { 1, 1, 1, 1 } },
-    { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v8f64,  { 3, 1, 1, 1 } },
-    { ISD::FP_TO_UINT,  MVT::v8i8,   MVT::v8f64,  { 3, 1, 1, 1 } },
-    { ISD::FP_TO_UINT,  MVT::v16i32, MVT::v16f32, { 1, 1, 1, 1 } },
-    { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, { 3, 1, 1, 1 } },
-    { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v16f32, { 3, 1, 1, 1 } },
+      {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, {1, 1, 1, 1}},
+      {ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, {3, 1, 1, 1}},
+      {ISD::FP_EXTEND,
+       MVT::v16f64,
+       MVT::v16f32,
+       {4, 1, 1, 1}}, // 2*vcvtps2pd+vextractf64x4
+      {ISD::FP_EXTEND, MVT::v16f32, MVT::v16f16, {1, 1, 1, 1}}, // vcvtph2ps
+      {ISD::FP_EXTEND,
+       MVT::v8f64,
+       MVT::v8f16,
+       {2, 1, 1, 1}}, // vcvtph2ps+vcvtps2pd
+      {ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, {1, 1, 1, 1}},
+      {ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, {1, 1, 1, 1}}, // vcvtps2ph
+
+      {ISD::TRUNCATE,
+       MVT::v2i1,
+       MVT::v2i8,
+       {3, 1, 1, 1}}, // sext+vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v4i1,
+       MVT::v4i8,
+       {3, 1, 1, 1}}, // sext+vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v8i1,
+       MVT::v8i8,
+       {3, 1, 1, 1}}, // sext+vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v16i1,
+       MVT::v16i8,
+       {3, 1, 1, 1}}, // sext+vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v2i1,
+       MVT::v2i16,
+       {3, 1, 1, 1}}, // sext+vpsllq+vptestmq
+      {ISD::TRUNCATE,
+       MVT::v4i1,
+       MVT::v4i16,
+       {3, 1, 1, 1}}, // sext+vpsllq+vptestmq
+      {ISD::TRUNCATE,
+       MVT::v8i1,
+       MVT::v8i16,
+       {3, 1, 1, 1}}, // sext+vpsllq+vptestmq
+      {ISD::TRUNCATE,
+       MVT::v16i1,
+       MVT::v16i16,
+       {3, 1, 1, 1}}, // sext+vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v2i1,
+       MVT::v2i32,
+       {2, 1, 1, 1}}, // zmm vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v4i1,
+       MVT::v4i32,
+       {2, 1, 1, 1}}, // zmm vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v8i1,
+       MVT::v8i32,
+       {2, 1, 1, 1}}, // zmm vpslld+vptestmd
+      {ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, {2, 1, 1, 1}}, // vpslld+vptestmd
+      {ISD::TRUNCATE,
+       MVT::v2i1,
+       MVT::v2i64,
+       {2, 1, 1, 1}}, // zmm vpsllq+vptestmq
+      {ISD::TRUNCATE,
+       MVT::v4i1,
+       MVT::v4i64,
+       {2, 1, 1, 1}}, // zmm vpsllq+vptestmq
+      {ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, {2, 1, 1, 1}},   // vpsllq+vptestmq
+      {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, {2, 1, 1, 1}},   // vpmovdb
+      {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, {2, 1, 1, 1}},   // vpmovdb
+      {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, {2, 1, 1, 1}}, // vpmovdb
+      {ISD::TRUNCATE, MVT::v32i8, MVT::v16i32, {2, 1, 1, 1}}, // vpmovdb
+      {ISD::TRUNCATE, MVT::v64i8, MVT::v16i32, {2, 1, 1, 1}}, // vpmovdb
+      {ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {2, 1, 1, 1}}, // vpmovdw
+      {ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, {2, 1, 1, 1}}, // vpmovdw
+      {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, {2, 1, 1, 1}},    // vpmovqb
+      {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, {1, 1, 1, 1}},   // vpshufb
+      {ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, {2, 1, 1, 1}},    // vpmovqb
+      {ISD::TRUNCATE, MVT::v16i8, MVT::v8i64, {2, 1, 1, 1}},   // vpmovqb
+      {ISD::TRUNCATE, MVT::v32i8, MVT::v8i64, {2, 1, 1, 1}},   // vpmovqb
+      {ISD::TRUNCATE, MVT::v64i8, MVT::v8i64, {2, 1, 1, 1}},   // vpmovqb
+      {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, {2, 1, 1, 1}},   // vpmovqw
+      {ISD::TRUNCATE, MVT::v16i16, MVT::v8i64, {2, 1, 1, 1}},  // vpmovqw
+      {ISD::TRUNCATE, MVT::v32i16, MVT::v8i64, {2, 1, 1, 1}},  // vpmovqw
+      {ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, {1, 1, 1, 1}},   // vpmovqd
+      {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, {1, 1, 1, 1}},   // zmm vpmovqd
+      {ISD::TRUNCATE,
+       MVT::v16i8,
+       MVT::v16i64,
+       {5, 1, 1, 1}}, // 2*vpmovqd+concat+vpmovdb
+
+      {ISD::TRUNCATE,
+       MVT::v16i8,
+       MVT::v16i16,
+       {3, 1, 1, 1}}, // extend to v16i32
+      {ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, {8, 1, 1, 1}},
+      {ISD::TRUNCATE, MVT::v64i8, MVT::v32i16, {8, 1, 1, 1}},
+
+      // Sign extend is zmm vpternlogd+vptruncdb.
+      // Zero extend is zmm broadcast load+vptruncdw.
+      {ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, {4, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, {4, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, {4, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, {4, 1, 1, 1}},
+
+      // Sign extend is zmm vpternlogd+vptruncdw.
+      // Zero extend is zmm vpternlogd+vptruncdw+vpsrlw.
+      {ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, {4, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, {4, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, {4, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, {3, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, {4, 1, 1, 1}},
+
+      {ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, {1, 1, 1, 1}}, // zmm vpternlogd
+      {ISD::ZERO_EXTEND,
+       MVT::v2i32,
+       MVT::v2i1,
+       {2, 1, 1, 1}}, // zmm vpternlogd+psrld
+      {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, {1, 1, 1, 1}}, // zmm vpternlogd
+      {ISD::ZERO_EXTEND,
+       MVT::v4i32,
+       MVT::v4i1,
+       {2, 1, 1, 1}}, // zmm vpternlogd+psrld
+      {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, {1, 1, 1, 1}}, // zmm vpternlogd
+      {ISD::ZERO_EXTEND,
+       MVT::v8i32,
+       MVT::v8i1,
+       {2, 1, 1, 1}}, // zmm vpternlogd+psrld
+      {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, {1, 1, 1, 1}}, // zmm vpternlogq
+      {ISD::ZERO_EXTEND,
+       MVT::v2i64,
+       MVT::v2i1,
+       {2, 1, 1, 1}}, // zmm vpternlogq+psrlq
+      {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, {1, 1, 1, 1}}, // zmm vpternlogq
+      {ISD::ZERO_EXTEND,
+       MVT::v4i64,
+       MVT::v4i1,
+       {2, 1, 1, 1}}, // zmm vpternlogq+psrlq
+
+      {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, {1, 1, 1, 1}}, // vpternlogd
+      {ISD::ZERO_EXTEND,
+       MVT::v16i32,
+       MVT::v16i1,
+       {2, 1, 1, 1}}, // vpternlogd+psrld
+      {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, {1, 1, 1, 1}}, // vpternlogq
+      {ISD::ZERO_EXTEND,
+       MVT::v8i64,
+       MVT::v8i1,
+       {2, 1, 1, 1}}, // vpternlogq+psrlq
+
+      {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, {1, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, {1, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, {1, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, {1, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, {1, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, {1, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, {1, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, {1, 1, 1, 1}},
+      {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, {1, 1, 1, 1}},
+      {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, {1, 1, 1, 1}},
+
+      {ISD::SIGN_EXTEND,
+       MVT::v32i16,
+       MVT::v32i8,
+       {3, 1, 1, 1}}, // FIXME: May not be right
+      {ISD::ZERO_EXTEND,
+       MVT::v32i16,
+       MVT::v32i8,
+       {3, 1, 1, 1}}, // FIXME: May not be right
+
+      {ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, {4, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, {3, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v8f64, MVT::v16i8, {2, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, {1, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, {2, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, {1, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, {1, 1, 1, 1}},
+      {ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, {1, 1, 1, 1}},
+
+      {ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, {4, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, {3, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v8f64, MVT::v16i8, {2, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, {1, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, {2, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, {1, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, {1, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, {1, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, {26, 1, 1, 1}},
+      {ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, {5, 1, 1, 1}},
+
+      {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, {2, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f64, {7, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f64, {15, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f32, {11, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f64, {31, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, {3, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f64, {7, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f32, {5, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f64, {15, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, {1, 1, 1, 1}},
+      {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f64, {3, 1, 1, 1}},
+
+      {ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, {1, 1, 1, 1}},
+      {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, {3, 1, 1, 1}},
+      {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, {3, 1, 1, 1}},
+      {ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, {1, 1, 1, 1}},
+      {ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, {3, 1, 1, 1}},
+      {ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, {3, 1, 1, 1}},
   };
 
   static const TypeConversionCostKindTblEntry AVX512BWVLConversionTbl[] {
@@ -2977,11 +3055,14 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   };
 
   static const TypeConversionCostKindTblEntry F16ConversionTbl[] = {
-    { ISD::FP_ROUND,  MVT::v8f16,   MVT::v8f32,   { 1, 1, 1, 1 } }, // vcvtps2ph
-    { ISD::FP_ROUND,  MVT::v4f16,   MVT::v4f32,   { 1, 1, 1, 1 } }, // vcvtps2ph
-    { ISD::FP_EXTEND, MVT::v8f32,   MVT::v8f16,   { 1, 1, 1, 1 } }, // vcvtph2ps
-    { ISD::FP_EXTEND, MVT::v4f32,   MVT::v4f16,   { 1, 1, 1, 1 } }, // vcvtph2ps
-    { ISD::FP_EXTEND, MVT::v4f64,   MVT::v4f16,   { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
+      {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, {1, 1, 1, 1}},  // vcvtps2ph
+      {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, {1, 1, 1, 1}},  // vcvtps2ph
+      {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, {1, 1, 1, 1}}, // vcvtph2ps
+      {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, {1, 1, 1, 1}}, // vcvtph2ps
+      {ISD::FP_EXTEND,
+       MVT::v4f64,
+       MVT::v4f16,
+       {2, 1, 1, 1}}, // vcvtph2ps+vcvtps2pd
   };
 
   // Attempt to map directly to (simple) MVT types to let us match custom entries.

``````````

</details>


https://github.com/llvm/llvm-project/pull/113195


More information about the llvm-commits mailing list