[llvm] [CostModel][X86] Update SK_Broadcast based on cost kinds (PR #150620)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 25 06:42:53 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 6d2c7b4eb..ef0123d24 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1838,33 +1838,33 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
-
- { TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
-
- { TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v64i8, { 8, 8, 8, 8 } }, // extend to v32i16
-
- { TTI::SK_PermuteTwoSrc, MVT::v32i16,{ 2, 2, 2, 2 } }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v32f16,{ 2, 2, 2, 2 } }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v16i16,{ 2, 2, 2, 2 } }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, { 2, 2, 2, 2 } }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v64i8, { 19, 19, 19, 19 } }, // 6 * v32i8 + 1
-
- { TTI::SK_Select, MVT::v32i16, { 1, 1, 1, 1 } }, // vblendmw
- { TTI::SK_Select, MVT::v64i8, { 1, 1, 1, 1 } }, // vblendmb
-
- { TTI::SK_Splice, MVT::v32i16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
- { TTI::SK_Splice, MVT::v32f16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
- { TTI::SK_Splice, MVT::v64i8, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
+ {TTI::SK_Broadcast, MVT::v32i16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, {1, 3, 1, 1}}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v32i16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_Reverse, MVT::v32f16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_Reverse, MVT::v16i16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_Reverse, MVT::v64i8, {2, 2, 2, 2}}, // pshufb + vshufi64x2
+
+ {TTI::SK_PermuteSingleSrc, MVT::v32i16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v32f16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, {2, 2, 2, 2}}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, {8, 8, 8, 8}}, // extend to v32i16
+
+ {TTI::SK_PermuteTwoSrc, MVT::v32i16, {2, 2, 2, 2}}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v32f16, {2, 2, 2, 2}}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, {2, 2, 2, 2}}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, {2, 2, 2, 2}}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, {19, 19, 19, 19}}, // 6 * v32i8 + 1
+
+ {TTI::SK_Select, MVT::v32i16, {1, 1, 1, 1}}, // vblendmw
+ {TTI::SK_Select, MVT::v64i8, {1, 1, 1, 1}}, // vblendmb
+
+ {TTI::SK_Splice, MVT::v32i16, {2, 2, 2, 2}}, // vshufi64x2 + palignr
+ {TTI::SK_Splice, MVT::v32f16, {2, 2, 2, 2}}, // vshufi64x2 + palignr
+ {TTI::SK_Splice, MVT::v64i8, {2, 2, 2, 2}}, // vshufi64x2 + palignr
};
if (ST->hasBWI())
@@ -1874,84 +1874,84 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8f64, { 1, 3, 1, 1 } }, // vbroadcastsd
- {TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 1 } }, // vbroadcastsd
- {TTI::SK_Broadcast, MVT::v16f32, { 1, 3, 1, 1 } }, // vbroadcastss
- {TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 1 } }, // vbroadcastss
- {TTI::SK_Broadcast, MVT::v8i64, { 1, 3, 1, 1 } }, // vpbroadcastq
- {TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 1 } }, // vpbroadcastq
- {TTI::SK_Broadcast, MVT::v16i32, { 1, 3, 1, 1 } }, // vpbroadcastd
- {TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 1 } }, // vpbroadcastd
- {TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
- {TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb
-
- {TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd
- {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
- {TTI::SK_Reverse, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq
- {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
- {TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca
- {TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca
- {TTI::SK_Reverse, MVT::v64i8, { 7, 7, 7, 7 } }, // per mca
-
- {TTI::SK_Splice, MVT::v8f64, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v4f64, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v16f32, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v8f32, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v8i64, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v4i64, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v16i32, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v8i32, { 1, 1, 1, 1 } }, // vpalignd
- {TTI::SK_Splice, MVT::v32i16, { 4, 4, 4, 4 } }, // split + palignr
- {TTI::SK_Splice, MVT::v32f16, { 4, 4, 4, 4 } }, // split + palignr
- {TTI::SK_Splice, MVT::v64i8, { 4, 4, 4, 4 } }, // split + palignr
-
- {TTI::SK_PermuteSingleSrc, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd
- {TTI::SK_PermuteSingleSrc, MVT::v4f64, { 1, 3, 1, 1 } }, // vpermpd
- {TTI::SK_PermuteSingleSrc, MVT::v2f64, { 1, 3, 1, 1 } }, // vpermpd
- {TTI::SK_PermuteSingleSrc, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
- {TTI::SK_PermuteSingleSrc, MVT::v8f32, { 1, 3, 1, 1 } }, // vpermps
- {TTI::SK_PermuteSingleSrc, MVT::v4f32, { 1, 3, 1, 1 } }, // vpermps
- {TTI::SK_PermuteSingleSrc, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq
- {TTI::SK_PermuteSingleSrc, MVT::v4i64, { 1, 3, 1, 1 } }, // vpermq
- {TTI::SK_PermuteSingleSrc, MVT::v2i64, { 1, 3, 1, 1 } }, // vpermq
- {TTI::SK_PermuteSingleSrc, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
- {TTI::SK_PermuteSingleSrc, MVT::v8i32, { 1, 3, 1, 1 } }, // vpermd
- {TTI::SK_PermuteSingleSrc, MVT::v4i32, { 1, 3, 1, 1 } }, // vpermd
- {TTI::SK_PermuteSingleSrc, MVT::v16i8, { 1, 3, 1, 1 } }, // pshufb
-
- {TTI::SK_PermuteTwoSrc, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermt2pd
- {TTI::SK_PermuteTwoSrc, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermt2ps
- {TTI::SK_PermuteTwoSrc, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermt2q
- {TTI::SK_PermuteTwoSrc, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermt2d
- {TTI::SK_PermuteTwoSrc, MVT::v4f64, { 1, 3, 1, 1 } }, // vpermt2pd
- {TTI::SK_PermuteTwoSrc, MVT::v8f32, { 1, 3, 1, 1 } }, // vpermt2ps
- {TTI::SK_PermuteTwoSrc, MVT::v4i64, { 1, 3, 1, 1 } }, // vpermt2q
- {TTI::SK_PermuteTwoSrc, MVT::v8i32, { 1, 3, 1, 1 } }, // vpermt2d
- {TTI::SK_PermuteTwoSrc, MVT::v2f64, { 1, 3, 1, 1 } }, // vpermt2pd
- {TTI::SK_PermuteTwoSrc, MVT::v4f32, { 1, 3, 1, 1 } }, // vpermt2ps
- {TTI::SK_PermuteTwoSrc, MVT::v2i64, { 1, 3, 1, 1 } }, // vpermt2q
- {TTI::SK_PermuteTwoSrc, MVT::v4i32, { 1, 3, 1, 1 } }, // vpermt2d
+ {TTI::SK_Broadcast, MVT::v8f64, {1, 3, 1, 1}}, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v4f64, {1, 3, 1, 1}}, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v16f32, {1, 3, 1, 1}}, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8f32, {1, 3, 1, 1}}, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8i64, {1, 3, 1, 1}}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v4i64, {1, 3, 1, 1}}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v16i32, {1, 3, 1, 1}}, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v8i32, {1, 3, 1, 1}}, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v32i16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16i16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, {1, 3, 1, 1}}, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v32i8, {1, 3, 1, 1}}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v8f64, {1, 3, 1, 1}}, // vpermpd
+ {TTI::SK_Reverse, MVT::v16f32, {1, 3, 1, 1}}, // vpermps
+ {TTI::SK_Reverse, MVT::v8i64, {1, 3, 1, 1}}, // vpermq
+ {TTI::SK_Reverse, MVT::v16i32, {1, 3, 1, 1}}, // vpermd
+ {TTI::SK_Reverse, MVT::v32i16, {7, 7, 7, 7}}, // per mca
+ {TTI::SK_Reverse, MVT::v32f16, {7, 7, 7, 7}}, // per mca
+ {TTI::SK_Reverse, MVT::v64i8, {7, 7, 7, 7}}, // per mca
+
+ {TTI::SK_Splice, MVT::v8f64, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v4f64, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v16f32, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v8f32, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v8i64, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v4i64, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v16i32, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v8i32, {1, 1, 1, 1}}, // vpalignd
+ {TTI::SK_Splice, MVT::v32i16, {4, 4, 4, 4}}, // split + palignr
+ {TTI::SK_Splice, MVT::v32f16, {4, 4, 4, 4}}, // split + palignr
+ {TTI::SK_Splice, MVT::v64i8, {4, 4, 4, 4}}, // split + palignr
+
+ {TTI::SK_PermuteSingleSrc, MVT::v8f64, {1, 3, 1, 1}}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, {1, 3, 1, 1}}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 3, 1, 1}}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v16f32, {1, 3, 1, 1}}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, {1, 3, 1, 1}}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v4f32, {1, 3, 1, 1}}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v8i64, {1, 3, 1, 1}}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, {1, 3, 1, 1}}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 3, 1, 1}}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v16i32, {1, 3, 1, 1}}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, {1, 3, 1, 1}}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 3, 1, 1}}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, {1, 3, 1, 1}}, // pshufb
+
+ {TTI::SK_PermuteTwoSrc, MVT::v8f64, {1, 3, 1, 1}}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v16f32, {1, 3, 1, 1}}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v8i64, {1, 3, 1, 1}}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v16i32, {1, 3, 1, 1}}, // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, {1, 3, 1, 1}}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, {1, 3, 1, 1}}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, {1, 3, 1, 1}}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, {1, 3, 1, 1}}, // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 3, 1, 1}}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v4f32, {1, 3, 1, 1}}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 3, 1, 1}}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v4i32, {1, 3, 1, 1}}, // vpermt2d
// FIXME: This just applies the type legalization cost rules above
// assuming these completely split.
- {TTI::SK_PermuteSingleSrc, MVT::v32i16, { 14, 14, 14, 14 } },
- {TTI::SK_PermuteSingleSrc, MVT::v32f16, { 14, 14, 14, 14 } },
- {TTI::SK_PermuteSingleSrc, MVT::v64i8, { 14, 14, 14, 14 } },
- {TTI::SK_PermuteTwoSrc, MVT::v32i16, { 42, 42, 42, 42 } },
- {TTI::SK_PermuteTwoSrc, MVT::v32f16, { 42, 42, 42, 42 } },
- {TTI::SK_PermuteTwoSrc, MVT::v64i8, { 42, 42, 42, 42 } },
-
- {TTI::SK_Select, MVT::v32i16, { 1, 1, 1, 1 } }, // vpternlogq
- {TTI::SK_Select, MVT::v32f16, { 1, 1, 1, 1 } }, // vpternlogq
- {TTI::SK_Select, MVT::v64i8, { 1, 1, 1, 1 } }, // vpternlogq
- {TTI::SK_Select, MVT::v8f64, { 1, 1, 1, 1 } }, // vblendmpd
- {TTI::SK_Select, MVT::v16f32, { 1, 1, 1, 1 } }, // vblendmps
- {TTI::SK_Select, MVT::v8i64, { 1, 1, 1, 1 } }, // vblendmq
- {TTI::SK_Select, MVT::v16i32, { 1, 1, 1, 1 } }, // vblendmd
+ {TTI::SK_PermuteSingleSrc, MVT::v32i16, {14, 14, 14, 14}},
+ {TTI::SK_PermuteSingleSrc, MVT::v32f16, {14, 14, 14, 14}},
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, {14, 14, 14, 14}},
+ {TTI::SK_PermuteTwoSrc, MVT::v32i16, {42, 42, 42, 42}},
+ {TTI::SK_PermuteTwoSrc, MVT::v32f16, {42, 42, 42, 42}},
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, {42, 42, 42, 42}},
+
+ {TTI::SK_Select, MVT::v32i16, {1, 1, 1, 1}}, // vpternlogq
+ {TTI::SK_Select, MVT::v32f16, {1, 1, 1, 1}}, // vpternlogq
+ {TTI::SK_Select, MVT::v64i8, {1, 1, 1, 1}}, // vpternlogq
+ {TTI::SK_Select, MVT::v8f64, {1, 1, 1, 1}}, // vblendmpd
+ {TTI::SK_Select, MVT::v16f32, {1, 1, 1, 1}}, // vblendmps
+ {TTI::SK_Select, MVT::v8i64, {1, 1, 1, 1}}, // vblendmq
+ {TTI::SK_Select, MVT::v16i32, {1, 1, 1, 1}}, // vblendmd
};
if (ST->hasAVX512())
@@ -1980,50 +1980,50 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 2 } }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 2 } }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 2 } }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 2 } }, // vpbroadcastd
- { TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 2 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v8i16, { 1, 3, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 2 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v8f16, { 1, 3, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb
- { TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb
-
- { TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
-
- { TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb
- { TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb
- { TTI::SK_Select, MVT::v32i8, { 1, 1, 1, 1 } }, // vpblendvb
-
- { TTI::SK_Splice, MVT::v8i32, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
- { TTI::SK_Splice, MVT::v8f32, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
- { TTI::SK_Splice, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
- { TTI::SK_Splice, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
- { TTI::SK_Splice, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
-
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } },
- { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } },
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 4, 4, 4, 4 } },
-
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, { 3, 3, 3, 3 } }, // 2*vpermpd + vblendpd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, { 3, 3, 3, 3 } }, // 2*vpermps + vblendps
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, { 3, 3, 3, 3 } }, // 2*vpermq + vpblendd
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, { 3, 3, 3, 3 } }, // 2*vpermd + vpblendd
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 7, 7, 7, 7 } },
- { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 7, 7, 7, 7 } },
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, { 7, 7, 7, 7 } },
+ {TTI::SK_Broadcast, MVT::v4f64, {1, 3, 1, 2}}, // vbroadcastpd
+ {TTI::SK_Broadcast, MVT::v8f32, {1, 3, 1, 2}}, // vbroadcastps
+ {TTI::SK_Broadcast, MVT::v4i64, {1, 3, 1, 2}}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v8i32, {1, 3, 1, 2}}, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v16i16, {1, 3, 1, 2}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, {1, 3, 1, 2}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 3, 1, 1}}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32i8, {1, 3, 1, 2}}, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v16i8, {1, 3, 1, 1}}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v4f64, {1, 1, 1, 1}}, // vpermpd
+ {TTI::SK_Reverse, MVT::v8f32, {1, 1, 1, 1}}, // vpermps
+ {TTI::SK_Reverse, MVT::v4i64, {1, 1, 1, 1}}, // vpermq
+ {TTI::SK_Reverse, MVT::v8i32, {1, 1, 1, 1}}, // vpermd
+ {TTI::SK_Reverse, MVT::v16i16, {2, 2, 2, 2}}, // vperm2i128 + pshufb
+ {TTI::SK_Reverse, MVT::v16f16, {2, 2, 2, 2}}, // vperm2i128 + pshufb
+ {TTI::SK_Reverse, MVT::v32i8, {2, 2, 2, 2}}, // vperm2i128 + pshufb
+
+ {TTI::SK_Select, MVT::v16i16, {1, 1, 1, 1}}, // vpblendvb
+ {TTI::SK_Select, MVT::v16f16, {1, 1, 1, 1}}, // vpblendvb
+ {TTI::SK_Select, MVT::v32i8, {1, 1, 1, 1}}, // vpblendvb
+
+ {TTI::SK_Splice, MVT::v8i32, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+ {TTI::SK_Splice, MVT::v8f32, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+ {TTI::SK_Splice, MVT::v16i16, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+ {TTI::SK_Splice, MVT::v16f16, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+ {TTI::SK_Splice, MVT::v32i8, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, {1, 1, 1, 1}}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, {1, 1, 1, 1}}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, {1, 1, 1, 1}}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, {1, 1, 1, 1}}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, {4, 4, 4, 4}},
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, {4, 4, 4, 4}},
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, {4, 4, 4, 4}},
+
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, {3, 3, 3, 3}}, // 2*vpermpd + vblendpd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, {3, 3, 3, 3}}, // 2*vpermps + vblendps
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, {3, 3, 3, 3}}, // 2*vpermq + vpblendd
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, {3, 3, 3, 3}}, // 2*vpermd + vpblendd
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, {7, 7, 7, 7}},
+ {TTI::SK_PermuteTwoSrc, MVT::v16f16, {7, 7, 7, 7}},
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, {7, 7, 7, 7}},
};
if (ST->hasAVX2())
@@ -2087,62 +2087,100 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX1ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4f64, {2,3,2,3}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8f32, {2,3,2,3}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v4i64, {2,3,2,3}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8i32, {2,3,2,3}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v16i16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128
-
- {TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
- // + vinsertf128
- {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
- // + vinsertf128
- {TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- {TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd
- {TTI::SK_Select, MVT::v4f64, {1,1,1,1}}, // vblendpd
- {TTI::SK_Select, MVT::v8i32, {1,1,1,1}}, // vblendps
- {TTI::SK_Select, MVT::v8f32, {1,1,1,1}}, // vblendps
- {TTI::SK_Select, MVT::v16i16, {3,3,3,3}}, // vpand + vpandn + vpor
- {TTI::SK_Select, MVT::v16f16, {3,3,3,3}}, // vpand + vpandn + vpor
- {TTI::SK_Select, MVT::v32i8, {3,3,3,3}}, // vpand + vpandn + vpor
-
- {TTI::SK_Splice, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + shufpd
- {TTI::SK_Splice, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + shufpd
- {TTI::SK_Splice, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_Splice, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_Splice, MVT::v16i16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
- {TTI::SK_Splice, MVT::v16f16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
- {TTI::SK_Splice, MVT::v32i8, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-
- {TTI::SK_PermuteSingleSrc, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vshufpd
- {TTI::SK_PermuteSingleSrc, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vshufpd
- {TTI::SK_PermuteSingleSrc, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteSingleSrc, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteSingleSrc, MVT::v16i16,{8,8,8,8}}, // vextractf128 + 4*pshufb
- // + 2*por + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v16f16,{8,8,8,8}}, // vextractf128 + 4*pshufb
- // + 2*por + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, {8,8,8,8}}, // vextractf128 + 4*pshufb
- // + 2*por + vinsertf128
-
- {TTI::SK_PermuteTwoSrc, MVT::v4f64, {3,3,3,3}}, // 2*vperm2f128 + vshufpd
- {TTI::SK_PermuteTwoSrc, MVT::v4i64, {3,3,3,3}}, // 2*vperm2f128 + vshufpd
- {TTI::SK_PermuteTwoSrc, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteTwoSrc, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteTwoSrc, MVT::v16i16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v16f16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, {15,15,15,15}}, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
+ {TTI::SK_Broadcast, MVT::v4f64, {2, 3, 2, 3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, {2, 3, 2, 3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, {2, 3, 2, 3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, {2, 3, 2, 3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast,
+ MVT::v16i16,
+ {2, 3, 3, 4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast,
+ MVT::v16f16,
+ {2, 3, 3, 4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, {3, 4, 3, 6}}, // vpshufb + vinsertf128
+
+ {TTI::SK_Reverse, MVT::v4f64, {2, 2, 2, 2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8f32, {2, 2, 2, 2}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v4i64, {2, 2, 2, 2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8i32, {2, 2, 2, 2}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v16i16, {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ {TTI::SK_Reverse, MVT::v16f16, {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ {TTI::SK_Reverse, MVT::v32i8, {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ {TTI::SK_Select, MVT::v4i64, {1, 1, 1, 1}}, // vblendpd
+ {TTI::SK_Select, MVT::v4f64, {1, 1, 1, 1}}, // vblendpd
+ {TTI::SK_Select, MVT::v8i32, {1, 1, 1, 1}}, // vblendps
+ {TTI::SK_Select, MVT::v8f32, {1, 1, 1, 1}}, // vblendps
+ {TTI::SK_Select, MVT::v16i16, {3, 3, 3, 3}}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v16f16, {3, 3, 3, 3}}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v32i8, {3, 3, 3, 3}}, // vpand + vpandn + vpor
+
+ {TTI::SK_Splice, MVT::v4i64, {2, 2, 2, 2}}, // vperm2f128 + shufpd
+ {TTI::SK_Splice, MVT::v4f64, {2, 2, 2, 2}}, // vperm2f128 + shufpd
+ {TTI::SK_Splice, MVT::v8i32, {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_Splice, MVT::v8f32, {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_Splice,
+ MVT::v16i16,
+ {5, 5, 5, 5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+ {TTI::SK_Splice,
+ MVT::v16f16,
+ {5, 5, 5, 5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+ {TTI::SK_Splice,
+ MVT::v32i8,
+ {5, 5, 5, 5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v4f64,
+ {2, 2, 2, 2}}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v4i64,
+ {2, 2, 2, 2}}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v8f32,
+ {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v8i32,
+ {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v16i16,
+ {8, 8, 8, 8}}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v16f16,
+ {8, 8, 8, 8}}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v32i8,
+ {8, 8, 8, 8}}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v4f64,
+ {3, 3, 3, 3}}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v4i64,
+ {3, 3, 3, 3}}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v8f32,
+ {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v8i32,
+ {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v16i16,
+ {15, 15, 15, 15}}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v16f16,
+ {15, 15, 15, 15}}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v32i8,
+ {15, 15, 15, 15}}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
};
if (ST->hasAVX())
@@ -2211,8 +2249,10 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
{TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
{TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
+ {TTI::SK_Reverse,
+ MVT::v16i8,
+ {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
{TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd
{TTI::SK_Select, MVT::v2f64, {1, 1, 1, 1}}, // movsd
@@ -2231,16 +2271,24 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_PermuteSingleSrc, MVT::v8i16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
- // + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v8f16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
- // + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v16i8, {8, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + 2*packus
-
- {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
- {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
- {TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v8i16,
+ {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ // + pshufd/unpck
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v8f16,
+ {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ // + pshufd/unpck
+ {TTI::SK_PermuteSingleSrc,
+ MVT::v16i8,
+ {8, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + 2*packus
+
+ {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_PermuteTwoSrc,
+ MVT::v4i32,
+ {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
{TTI::SK_PermuteTwoSrc, MVT::v8i16, {6, 8, 8, 8}}, // blend+permute
{TTI::SK_PermuteTwoSrc, MVT::v8f16, {6, 8, 8, 8}}, // blend+permute
{TTI::SK_PermuteTwoSrc, MVT::v16i8, {11, 13, 13, 13}}, // blend+permute
``````````
</details>
https://github.com/llvm/llvm-project/pull/150620
More information about the llvm-commits
mailing list