[llvm] 7d172f9 - [CostModel][X86] getShuffleCosts - convert all shuffle cost tables to be CostKind compatible. NFC. (#124753)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 07:04:30 PST 2025
Author: Simon Pilgrim
Date: 2025-01-28T15:04:25Z
New Revision: 7d172f96ff2c4c7cf5c428b79a3c18e067ce0079
URL: https://github.com/llvm/llvm-project/commit/7d172f96ff2c4c7cf5c428b79a3c18e067ce0079
DIFF: https://github.com/llvm/llvm-project/commit/7d172f96ff2c4c7cf5c428b79a3c18e067ce0079.diff
LOG: [CostModel][X86] getShuffleCosts - convert all shuffle cost tables to be CostKind compatible. NFC. (#124753)
No change in actual costs yet, but split the costs per cost kind to make it easier to tweak the numbers in future patches.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 22dccaf061e1fd..82523bb6557adc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1673,40 +1673,41 @@ InstructionCost X86TTIImpl::getShuffleCost(
EVT VT = TLI->getValueType(DL, BaseTp);
if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
!ST->hasSSSE3()) {
- static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4i16, 1}, // pshuflw
- {TTI::SK_Broadcast, MVT::v2i16, 1}, // pshuflw
- {TTI::SK_Broadcast, MVT::v8i8, 2}, // punpck/pshuflw
- {TTI::SK_Broadcast, MVT::v4i8, 2}, // punpck/pshuflw
- {TTI::SK_Broadcast, MVT::v2i8, 1}, // punpck
-
- {TTI::SK_Reverse, MVT::v4i16, 1}, // pshuflw
- {TTI::SK_Reverse, MVT::v2i16, 1}, // pshuflw
- {TTI::SK_Reverse, MVT::v4i8, 3}, // punpck/pshuflw/packus
- {TTI::SK_Reverse, MVT::v2i8, 1}, // punpck
-
- {TTI::SK_Splice, MVT::v4i16, 2}, // punpck+psrldq
- {TTI::SK_Splice, MVT::v2i16, 2}, // punpck+psrldq
- {TTI::SK_Splice, MVT::v4i8, 2}, // punpck+psrldq
- {TTI::SK_Splice, MVT::v2i8, 2}, // punpck+psrldq
-
- {TTI::SK_PermuteTwoSrc, MVT::v4i16, 2}, // punpck/pshuflw
- {TTI::SK_PermuteTwoSrc, MVT::v2i16, 2}, // punpck/pshuflw
- {TTI::SK_PermuteTwoSrc, MVT::v8i8, 7}, // punpck/pshuflw
- {TTI::SK_PermuteTwoSrc, MVT::v4i8, 4}, // punpck/pshuflw
- {TTI::SK_PermuteTwoSrc, MVT::v2i8, 2}, // punpck
-
- {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, // pshuflw
- {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, // pshuflw
- {TTI::SK_PermuteSingleSrc, MVT::v8i8, 5}, // punpck/pshuflw
- {TTI::SK_PermuteSingleSrc, MVT::v4i8, 3}, // punpck/pshuflw
- {TTI::SK_PermuteSingleSrc, MVT::v2i8, 1}, // punpck
+ static const CostKindTblEntry SSE2SubVectorShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v4i16, {1,1,1,1}}, // pshuflw
+ {TTI::SK_Broadcast, MVT::v2i16, {1,1,1,1}}, // pshuflw
+ {TTI::SK_Broadcast, MVT::v8i8, {2,2,2,2}}, // punpck/pshuflw
+ {TTI::SK_Broadcast, MVT::v4i8, {2,2,2,2}}, // punpck/pshuflw
+ {TTI::SK_Broadcast, MVT::v2i8, {1,1,1,1}}, // punpck
+
+ {TTI::SK_Reverse, MVT::v4i16, {1,1,1,1}}, // pshuflw
+ {TTI::SK_Reverse, MVT::v2i16, {1,1,1,1}}, // pshuflw
+ {TTI::SK_Reverse, MVT::v4i8, {3,3,3,3}}, // punpck/pshuflw/packus
+ {TTI::SK_Reverse, MVT::v2i8, {1,1,1,1}}, // punpck
+
+ {TTI::SK_Splice, MVT::v4i16, {2,2,2,2}}, // punpck+psrldq
+ {TTI::SK_Splice, MVT::v2i16, {2,2,2,2}}, // punpck+psrldq
+ {TTI::SK_Splice, MVT::v4i8, {2,2,2,2}}, // punpck+psrldq
+ {TTI::SK_Splice, MVT::v2i8, {2,2,2,2}}, // punpck+psrldq
+
+ {TTI::SK_PermuteTwoSrc, MVT::v4i16, {2,2,2,2}}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v2i16, {2,2,2,2}}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v8i8, {7,7,7,7}}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v4i8, {4,4,4,4}}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v2i8, {2,2,2,2}}, // punpck
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4i16, {1,1,1,1}}, // pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v2i16, {1,1,1,1}}, // pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v8i8, {5,5,5,5}}, // punpck/pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v4i8, {3,3,3,3}}, // punpck/pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v2i8, {1,1,1,1}}, // punpck
};
if (ST->hasSSE2())
if (const auto *Entry =
CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT()))
- return Entry->Cost;
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
}
// We are going to permute multiple sources and the result will be in multiple
@@ -1803,57 +1804,57 @@ InstructionCost X86TTIImpl::getShuffleCost(
if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
return TTI::TCC_Basic;
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
- {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
-
- {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
-
- {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, // vpermt2b
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // vpermt2b
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2} // vpermt2b
+ static const CostKindTblEntry AVX512VBMIShuffleTbl[] = {
+ { TTI::SK_Reverse, MVT::v64i8, { 1, 1, 1, 1 } }, // vpermb
+ { TTI::SK_Reverse, MVT::v32i8, { 1, 1, 1, 1 } }, // vpermb
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, { 1, 1, 1, 1 } }, // vpermb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 1, 1, 1, 1 } }, // vpermb
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, { 2, 2, 2, 2 } }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, { 2, 2, 2, 2 } }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, { 2, 2, 2, 2 } } // vpermt2b
};
if (ST->hasVBMI())
if (const auto *Entry =
CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
-
- {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw
- {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
- {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw
- {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2
-
- {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16
-
- {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
-
- {TTI::SK_Select, MVT::v32i16, 1}, // vblendmw
- {TTI::SK_Select, MVT::v64i8, 1}, // vblendmb
-
- {TTI::SK_Splice, MVT::v32i16, 2}, // vshufi64x2 + palignr
- {TTI::SK_Splice, MVT::v32f16, 2}, // vshufi64x2 + palignr
- {TTI::SK_Splice, MVT::v64i8, 2}, // vshufi64x2 + palignr
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry AVX512BWShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
+
+ { TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, { 8, 8, 8, 8 } }, // extend to v32i16
+
+ { TTI::SK_PermuteTwoSrc, MVT::v32i16,{ 2, 2, 2, 2 } }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v32f16,{ 2, 2, 2, 2 } }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16,{ 2, 2, 2, 2 } }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, { 2, 2, 2, 2 } }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, { 19, 19, 19, 19 } }, // 6 * v32i8 + 1
+
+ { TTI::SK_Select, MVT::v32i16, { 1, 1, 1, 1 } }, // vblendmw
+ { TTI::SK_Select, MVT::v64i8, { 1, 1, 1, 1 } }, // vblendmb
+
+ { TTI::SK_Splice, MVT::v32i16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
+ { TTI::SK_Splice, MVT::v32f16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
+ { TTI::SK_Splice, MVT::v64i8, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
};
if (ST->hasBWI())
if (const auto *Entry =
CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
static const CostKindTblEntry AVX512ShuffleTbl[] = {
{TTI::SK_Broadcast, MVT::v8f64, { 1, 1, 1, 1 } }, // vbroadcastsd
@@ -1934,286 +1935,288 @@ InstructionCost X86TTIImpl::getShuffleCost(
if (auto KindCost = Entry->Cost[CostKind])
return LT.first * *KindCost;
- static const CostTblEntry AVX2InLaneShuffleTbl[] = {
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpshufb
- {TTI::SK_PermuteSingleSrc, MVT::v16f16, 1}, // vpshufb
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpshufb
-
- {TTI::SK_PermuteTwoSrc, MVT::v4f64, 2}, // 2*vshufpd + vblendpd
- {TTI::SK_PermuteTwoSrc, MVT::v8f32, 2}, // 2*vshufps + vblendps
- {TTI::SK_PermuteTwoSrc, MVT::v4i64, 2}, // 2*vpshufd + vpblendd
- {TTI::SK_PermuteTwoSrc, MVT::v8i32, 2}, // 2*vpshufd + vpblendd
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // 2*vpshufb + vpor
- {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, // 2*vpshufb + vpor
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // 2*vpshufb + vpor
+ static const CostKindTblEntry AVX2InLaneShuffleTbl[] = {
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 1, 1, 1, 1 } }, // vpshufb
+ { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 1, 1, 1, 1 } }, // vpshufb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 1, 1, 1, 1 } }, // vpshufb
+
+ { TTI::SK_PermuteTwoSrc, MVT::v4f64, { 2, 2, 2, 2 } }, // 2*vshufpd + vblendpd
+ { TTI::SK_PermuteTwoSrc, MVT::v8f32, { 2, 2, 2, 2 } }, // 2*vshufps + vblendps
+ { TTI::SK_PermuteTwoSrc, MVT::v4i64, { 2, 2, 2, 2 } }, // 2*vpshufd + vpblendd
+ { TTI::SK_PermuteTwoSrc, MVT::v8i32, { 2, 2, 2, 2 } }, // 2*vpshufd + vpblendd
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
+ { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
};
if (IsInLaneShuffle && ST->hasAVX2())
if (const auto *Entry =
CostTableLookup(AVX2InLaneShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry AVX2ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4f64, 1}, // vbroadcastpd
- {TTI::SK_Broadcast, MVT::v8f32, 1}, // vbroadcastps
- {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq
- {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd
- {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb
-
- {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd
- {TTI::SK_Reverse, MVT::v8f32, 1}, // vpermps
- {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq
- {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd
- {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
- {TTI::SK_Reverse, MVT::v16f16, 2}, // vperm2i128 + pshufb
- {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb
-
- {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
- {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb
- {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb
-
- {TTI::SK_Splice, MVT::v8i32, 2}, // vperm2i128 + vpalignr
- {TTI::SK_Splice, MVT::v8f32, 2}, // vperm2i128 + vpalignr
- {TTI::SK_Splice, MVT::v16i16, 2}, // vperm2i128 + vpalignr
- {TTI::SK_Splice, MVT::v16f16, 2}, // vperm2i128 + vpalignr
- {TTI::SK_Splice, MVT::v32i8, 2}, // vperm2i128 + vpalignr
-
- {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
- {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
- {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
- {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
- // + vpblendvb
- {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vperm2i128 + 2*vpshufb
- // + vpblendvb
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb
- // + vpblendvb
-
- {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vpermpd + vblendpd
- {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, // 2*vpermps + vblendps
- {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vpermq + vpblendd
- {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
- {TTI::SK_PermuteTwoSrc, MVT::v16f16, 7}, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry AVX2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, { 1, 1, 1, 1 } }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, { 1, 1, 1, 1 } }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, { 1, 1, 1, 1 } }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, { 1, 1, 1, 1 } }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+
+ { TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb
+ { TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb
+ { TTI::SK_Select, MVT::v32i8, { 1, 1, 1, 1 } }, // vpblendvb
+
+ { TTI::SK_Splice, MVT::v8i32, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+ { TTI::SK_Splice, MVT::v8f32, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+ { TTI::SK_Splice, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+ { TTI::SK_Splice, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+ { TTI::SK_Splice, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } },
+ { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } },
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 4, 4, 4, 4 } },
+
+ { TTI::SK_PermuteTwoSrc, MVT::v4f64, { 3, 3, 3, 3 } }, // 2*vpermpd + vblendpd
+ { TTI::SK_PermuteTwoSrc, MVT::v8f32, { 3, 3, 3, 3 } }, // 2*vpermps + vblendps
+ { TTI::SK_PermuteTwoSrc, MVT::v4i64, { 3, 3, 3, 3 } }, // 2*vpermq + vpblendd
+ { TTI::SK_PermuteTwoSrc, MVT::v8i32, { 3, 3, 3, 3 } }, // 2*vpermd + vpblendd
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 7, 7, 7, 7 } },
+ { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 7, 7, 7, 7 } },
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, { 7, 7, 7, 7 } },
};
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry XOPShuffleTbl[] = {
- {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vpermil2pd
- {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, // vperm2f128 + vpermil2ps
- {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vpermil2pd
- {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, // vperm2f128 + vpermil2ps
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm
- // + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vextractf128 + 2*vpperm
- // + vinsertf128
-
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm
- // + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpperm
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, // 2*vextractf128 + 6*vpperm
- // + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, // vpperm
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry XOPShuffleTbl[] = {
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2pd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2ps
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2pd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2ps
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16,{ 4, 4, 4, 4 } }, // vextractf128 + 2*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 4, 4, 4, 4 } }, // vextractf128 + 2*vpperm
+ // + vinsertf128
+
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, { 1, 1, 1, 1 } }, // vpperm
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, { 9, 9, 9, 9 } }, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, { 1, 1, 1, 1 } }, // vpperm
};
if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry AVX1InLaneShuffleTbl[] = {
- {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermilpd
- {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermilpd
- {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermilps
- {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermilps
-
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
- // + vpor + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vextractf128 + 2*pshufb
- // + vpor + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vextractf128 + 2*pshufb
- // + vpor + vinsertf128
-
- {TTI::SK_PermuteTwoSrc, MVT::v4f64, 2}, // 2*vshufpd + vblendpd
- {TTI::SK_PermuteTwoSrc, MVT::v8f32, 2}, // 2*vshufps + vblendps
- {TTI::SK_PermuteTwoSrc, MVT::v4i64, 2}, // 2*vpermilpd + vblendpd
- {TTI::SK_PermuteTwoSrc, MVT::v8i32, 2}, // 2*vpermilps + vblendps
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 4*pshufb
- // + 2*vpor + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v16f16, 9}, // 2*vextractf128 + 4*pshufb
- // + 2*vpor + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, // 2*vextractf128 + 4*pshufb
- // + 2*vpor + vinsertf128
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry AVX1InLaneShuffleTbl[] = {
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermilpd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermilpd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermilps
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermilps
+
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
+ // + vpor + vinsertf128
+ { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
+ // + vpor + vinsertf128
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
+ // + vpor + vinsertf128
+
+ { TTI::SK_PermuteTwoSrc, MVT::v4f64, { 2, 2, 2, 2 } }, // 2*vshufpd + vblendpd
+ { TTI::SK_PermuteTwoSrc, MVT::v8f32, { 2, 2, 2, 2 } }, // 2*vshufps + vblendps
+ { TTI::SK_PermuteTwoSrc, MVT::v4i64, { 2, 2, 2, 2 } }, // 2*vpermilpd + vblendpd
+ { TTI::SK_PermuteTwoSrc, MVT::v8i32, { 2, 2, 2, 2 } }, // 2*vpermilps + vblendps
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
+ // + 2*vpor + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
+ // + 2*vpor + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
+ // + 2*vpor + vinsertf128
};
if (IsInLaneShuffle && ST->hasAVX())
if (const auto *Entry =
CostTableLookup(AVX1InLaneShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry AVX1ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8f32, 2}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v16f16, 3}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128
-
- {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8f32, 2}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
- // + vinsertf128
- {TTI::SK_Reverse, MVT::v16f16, 4}, // vextractf128 + 2*pshufb
- // + vinsertf128
- {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- {TTI::SK_Select, MVT::v4i64, 1}, // vblendpd
- {TTI::SK_Select, MVT::v4f64, 1}, // vblendpd
- {TTI::SK_Select, MVT::v8i32, 1}, // vblendps
- {TTI::SK_Select, MVT::v8f32, 1}, // vblendps
- {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
- {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor
- {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor
-
- {TTI::SK_Splice, MVT::v4i64, 2}, // vperm2f128 + shufpd
- {TTI::SK_Splice, MVT::v4f64, 2}, // vperm2f128 + shufpd
- {TTI::SK_Splice, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_Splice, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_Splice, MVT::v16i16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
- {TTI::SK_Splice, MVT::v16f16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
- {TTI::SK_Splice, MVT::v32i8, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-
- {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd
- {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vshufpd
- {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
- // + 2*por + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v16f16, 8}, // vextractf128 + 4*pshufb
- // + 2*por + vinsertf128
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb
- // + 2*por + vinsertf128
-
- {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vperm2f128 + vshufpd
- {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vperm2f128 + vshufpd
- {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v16f16, 15}, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry AVX1ShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, {2,2,2,2}}, // vpshufb + vinsertf128
+
+ {TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ {TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ {TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd
+ {TTI::SK_Select, MVT::v4f64, {1,1,1,1}}, // vblendpd
+ {TTI::SK_Select, MVT::v8i32, {1,1,1,1}}, // vblendps
+ {TTI::SK_Select, MVT::v8f32, {1,1,1,1}}, // vblendps
+ {TTI::SK_Select, MVT::v16i16, {3,3,3,3}}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v16f16, {3,3,3,3}}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v32i8, {3,3,3,3}}, // vpand + vpandn + vpor
+
+ {TTI::SK_Splice, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + shufpd
+ {TTI::SK_Splice, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + shufpd
+ {TTI::SK_Splice, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_Splice, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_Splice, MVT::v16i16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+ {TTI::SK_Splice, MVT::v16f16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+ {TTI::SK_Splice, MVT::v32i8, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16,{8,8,8,8}}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16,{8,8,8,8}}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, {8,8,8,8}}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, {3,3,3,3}}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, {3,3,3,3}}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v16f16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, {15,15,15,15}}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
};
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSE41ShuffleTbl[] = {
- {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
- {TTI::SK_Select, MVT::v2f64, 1}, // movsd
- {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
- {TTI::SK_Select, MVT::v4f32, 1}, // blendps
- {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
- {TTI::SK_Select, MVT::v8f16, 1}, // pblendw
- {TTI::SK_Select, MVT::v16i8, 1} // pblendvb
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry SSE41ShuffleTbl[] = {
+ {TTI::SK_Select, MVT::v2i64, {1,1,1,1}}, // pblendw
+ {TTI::SK_Select, MVT::v2f64, {1,1,1,1}}, // movsd
+ {TTI::SK_Select, MVT::v4i32, {1,1,1,1}}, // pblendw
+ {TTI::SK_Select, MVT::v4f32, {1,1,1,1}}, // blendps
+ {TTI::SK_Select, MVT::v8i16, {1,1,1,1}}, // pblendw
+ {TTI::SK_Select, MVT::v8f16, {1,1,1,1}}, // pblendw
+ {TTI::SK_Select, MVT::v16i8, {1,1,1,1}} // pblendvb
};
if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSSE3ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
- {TTI::SK_Broadcast, MVT::v8f16, 1}, // pshufb
- {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
-
- {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
- {TTI::SK_Reverse, MVT::v8f16, 1}, // pshufb
- {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
-
- {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
- {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por
- {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
-
- {TTI::SK_Splice, MVT::v4i32, 1}, // palignr
- {TTI::SK_Splice, MVT::v4f32, 1}, // palignr
- {TTI::SK_Splice, MVT::v8i16, 1}, // palignr
- {TTI::SK_Splice, MVT::v8f16, 1}, // palignr
- {TTI::SK_Splice, MVT::v16i8, 1}, // palignr
-
- {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
- {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb
- {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
-
- {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
- {TTI::SK_PermuteTwoSrc, MVT::v8f16, 3}, // 2*pshufb + por
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry SSSE3ShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+
+ {TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+
+ {TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v16i8, {3, 3, 3, 3}}, // 2*pshufb + por
+
+ {TTI::SK_Splice, MVT::v4i32, {1, 1, 1, 1}}, // palignr
+ {TTI::SK_Splice, MVT::v4f32, {1, 1, 1, 1}}, // palignr
+ {TTI::SK_Splice, MVT::v8i16, {1, 1, 1, 1}}, // palignr
+ {TTI::SK_Splice, MVT::v8f16, {1, 1, 1, 1}}, // palignr
+ {TTI::SK_Splice, MVT::v16i8, {1, 1, 1, 1}}, // palignr
+
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, {3, 3, 3, 3}}, // 2*pshufb + por
};
if (ST->hasSSSE3())
if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSE2ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
- {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
- {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
- {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v8f16, 2}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
-
- {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
- {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
- {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
- {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v8f16, 3}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
-
- {TTI::SK_Select, MVT::v2i64, 1}, // movsd
- {TTI::SK_Select, MVT::v2f64, 1}, // movsd
- {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
- {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
- {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por
- {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
-
- {TTI::SK_Splice, MVT::v2i64, 1}, // shufpd
- {TTI::SK_Splice, MVT::v2f64, 1}, // shufpd
- {TTI::SK_Splice, MVT::v4i32, 2}, // 2*{unpck,movsd,pshufd}
- {TTI::SK_Splice, MVT::v8i16, 3}, // psrldq + psrlldq + por
- {TTI::SK_Splice, MVT::v8f16, 3}, // psrldq + psrlldq + por
- {TTI::SK_Splice, MVT::v16i8, 3}, // psrldq + psrlldq + por
-
- {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
- {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
- {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
- {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
- // + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v8f16, 5}, // 2*pshuflw + 2*pshufhw
- // + pshufd/unpck
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + 2*packus
-
- { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd
- { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd}
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, // blend+permute
- { TTI::SK_PermuteTwoSrc, MVT::v8f16, 8 }, // blend+permute
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
+
+ static const CostKindTblEntry SSE2ShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
+ {TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
+ {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd
+
+ {TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
+ {TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
+ {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ {TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd
+ {TTI::SK_Select, MVT::v2f64, {1, 1, 1, 1}}, // movsd
+ {TTI::SK_Select, MVT::v4i32, {2, 2, 2, 2}}, // 2*shufps
+ {TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // pand + pandn + por
+ {TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // pand + pandn + por
+ {TTI::SK_Select, MVT::v16i8, {3, 3, 3, 3}}, // pand + pandn + por
+
+ {TTI::SK_Splice, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_Splice, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_Splice, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
+ {TTI::SK_Splice, MVT::v8i16, {3, 3, 3, 3}}, // psrldq + psrlldq + por
+ {TTI::SK_Splice, MVT::v8f16, {3, 3, 3, 3}}, // psrldq + psrlldq + por
+ {TTI::SK_Splice, MVT::v16i8, {3, 3, 3, 3}}, // psrldq + psrlldq + por
+
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ // + pshufd/unpck
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ // + pshufd/unpck
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + 2*packus
+
+ {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, {8, 8, 8, 8}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, {8, 8, 8, 8}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, {13, 13, 13, 13}}, // blend+permute
};
static const CostTblEntry SSE3BroadcastLoadTbl[] = {
@@ -2233,16 +2236,17 @@ InstructionCost X86TTIImpl::getShuffleCost(
}
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
}
- static const CostTblEntry SSE1ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps
- { TTI::SK_Splice, MVT::v4f32, 2 }, // 2*shufps
- { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
- { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps
+ static const CostKindTblEntry SSE1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f32, {1,1,1,1} }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, {1,1,1,1} }, // shufps
+ { TTI::SK_Select, MVT::v4f32, {2,2,2,2} }, // 2*shufps
+ { TTI::SK_Splice, MVT::v4f32, {2,2,2,2} }, // 2*shufps
+ { TTI::SK_PermuteSingleSrc, MVT::v4f32, {1,1,1,1} }, // shufps
+ { TTI::SK_PermuteTwoSrc, MVT::v4f32, {2,2,2,2} }, // 2*shufps
};
if (ST->hasSSE1()) {
@@ -2255,7 +2259,8 @@ InstructionCost X86TTIImpl::getShuffleCost(
return 1;
}
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (auto KindCost = Entry->Cost[CostKind])
+ return LT.first * *KindCost;
}
return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
More information about the llvm-commits
mailing list