[llvm] r290956 - [X86] Merged Reverse/Alternate shuffle cost tables. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 4 04:08:42 PST 2017
Author: rksimon
Date: Wed Jan 4 06:08:41 2017
New Revision: 290956
URL: http://llvm.org/viewvc/llvm-project?rev=290956&view=rev
Log:
[X86] Merged Reverse/Alternate shuffle cost tables. NFCI.
As discussed on D27811, merged the shuffle cost LUTs and use the shuffle kind to perform the lookup instead of the ISD opcode.
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=290956&r1=290955&r2=290956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Jan 4 06:08:41 2017
@@ -598,198 +598,138 @@ int X86TTIImpl::getArithmeticInstrCost(
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
-
- if (Kind == TTI::SK_Reverse) {
+ if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb
- { ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb
+ { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
};
if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry =
+ CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
- { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
- { ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
- // + 2*pshufb + vinserti64x4
+ { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
+ // + 2*pshufb + vinserti64x4
};
if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd
- { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
- { ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq
- { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
};
if (ST->hasAVX512())
if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd
- { ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps
- { ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq
- { ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd
- { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+
+ { TTI::SK_Alternate, MVT::v16i16, 1 } // vpblendw
};
if (ST->hasAVX2())
- if (const auto *Entry =
- CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX1ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb
- // + vinsertf128
+ { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
+
+ { TTI::SK_Alternate, MVT::v16i16, 5 }, // 2*vextractf128 + 2*vpblendw
+ // + vinsertf128
+ { TTI::SK_Alternate, MVT::v32i8, 9 } // 2*vextractf128 + 4*vpshufb
+ // + 2*vpor + vinsertf128
};
if (ST->hasAVX())
- if (const auto *Entry =
- CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE41ShuffleTbl[] = {
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
+ { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // 2*pshufb + por
+ };
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSSE3ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb
+ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
};
if (ST->hasSSSE3())
- if (const auto *Entry =
- CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE2ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
- { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
- { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
+ { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
+ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
+ { TTI::SK_Alternate, MVT::v8i16, 8 }, // 4*pextrw + 4*pinsrw.
+ { TTI::SK_Alternate, MVT::v16i8, 48 }, // 8*(pinsrw + pextrw + and +movb + movzb + or)
};
if (ST->hasSSE2())
- if (const auto *Entry =
- CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE1ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
};
if (ST->hasSSE1())
- if (const auto *Entry =
- CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
-
- } else if (Kind == TTI::SK_Alternate) {
- // 64-bit packed float vectors (v2f32) are widened to type v4f32.
- // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- // The backend knows how to generate a single VEX.256 version of
- // instruction VPBLENDW if the target supports AVX2.
- if (ST->hasAVX2() && LT.second == MVT::v16i16)
- return LT.first;
-
- static const CostTblEntry AVXAltShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
-
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
-
- // This shuffle is custom lowered into a sequence of:
- // 2x vextractf128 , 2x vpblendw , 1x vinsertf128
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
-
- // This shuffle is custom lowered into a long sequence of:
- // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
- };
-
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSE41AltShuffleTbl[] = {
- // These are lowered into movsd.
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
-
- // packed float vectors with four elements are lowered into BLENDI dag
- // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
-
- // This shuffle generates a single pshufw.
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
-
- // There is no instruction that matches a v16i8 alternate shuffle.
- // The backend will expand it into the sequence 'pshufb + pshufb + or'.
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
- };
-
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
- LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSSE3AltShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
-
- // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
- // the sequence 'shufps + pshufd'
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
-
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
- };
-
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
- static const CostTblEntry SSEAltShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
-
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
- // This is expanded into a long sequence of four extract + four insert.
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
-
- // 8 x (pinsrw + pextrw + and + movb + movzb + or)
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
- };
-
- // Fall-back (SSE3 and SSE2).
- if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
-
} else if (Kind == TTI::SK_PermuteTwoSrc) {
// We assume that source and destination have the same vector type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
More information about the llvm-commits
mailing list