[llvm] [CostModel][X86] getShuffleCosts - convert all shuffle cost tables to be CostKind compatible. NFC. (PR #124753)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 06:21:52 PST 2025


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/124753

No change in actual costs yet, but split the costs per cost kind to make it easier to tweak the numbers in future patches.

>From 79805403b23750b69616213fb81375711a06ebaa Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 28 Jan 2025 14:20:57 +0000
Subject: [PATCH] [CostModel][X86] getShuffleCosts - convert all shuffle cost
 tables to be CostKind compatible. NFC.

No change in actual costs yet, but split the costs per cost kind to make it easier to tweak the numbers in future patches.
---
 .../lib/Target/X86/X86TargetTransformInfo.cpp | 663 +++++++++---------
 1 file changed, 334 insertions(+), 329 deletions(-)

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 22dccaf061e1fd..82523bb6557adc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1673,40 +1673,41 @@ InstructionCost X86TTIImpl::getShuffleCost(
   EVT VT = TLI->getValueType(DL, BaseTp);
   if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
       !ST->hasSSSE3()) {
-     static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
-      {TTI::SK_Broadcast,        MVT::v4i16, 1}, // pshuflw
-      {TTI::SK_Broadcast,        MVT::v2i16, 1}, // pshuflw
-      {TTI::SK_Broadcast,        MVT::v8i8,  2}, // punpck/pshuflw
-      {TTI::SK_Broadcast,        MVT::v4i8,  2}, // punpck/pshuflw
-      {TTI::SK_Broadcast,        MVT::v2i8,  1}, // punpck
-
-      {TTI::SK_Reverse,          MVT::v4i16, 1}, // pshuflw
-      {TTI::SK_Reverse,          MVT::v2i16, 1}, // pshuflw
-      {TTI::SK_Reverse,          MVT::v4i8,  3}, // punpck/pshuflw/packus
-      {TTI::SK_Reverse,          MVT::v2i8,  1}, // punpck
-
-      {TTI::SK_Splice,           MVT::v4i16, 2}, // punpck+psrldq
-      {TTI::SK_Splice,           MVT::v2i16, 2}, // punpck+psrldq
-      {TTI::SK_Splice,           MVT::v4i8,  2}, // punpck+psrldq
-      {TTI::SK_Splice,           MVT::v2i8,  2}, // punpck+psrldq
-
-      {TTI::SK_PermuteTwoSrc,    MVT::v4i16, 2}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v2i16, 2}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v8i8,  7}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v4i8,  4}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v2i8,  2}, // punpck
-
-      {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, // pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, // pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v8i8,  5}, // punpck/pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v4i8,  3}, // punpck/pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v2i8,  1}, // punpck
+     static const CostKindTblEntry SSE2SubVectorShuffleTbl[] = {
+      {TTI::SK_Broadcast,        MVT::v4i16, {1,1,1,1}}, // pshuflw
+      {TTI::SK_Broadcast,        MVT::v2i16, {1,1,1,1}}, // pshuflw
+      {TTI::SK_Broadcast,        MVT::v8i8,  {2,2,2,2}}, // punpck/pshuflw
+      {TTI::SK_Broadcast,        MVT::v4i8,  {2,2,2,2}}, // punpck/pshuflw
+      {TTI::SK_Broadcast,        MVT::v2i8,  {1,1,1,1}}, // punpck
+
+      {TTI::SK_Reverse,          MVT::v4i16, {1,1,1,1}}, // pshuflw
+      {TTI::SK_Reverse,          MVT::v2i16, {1,1,1,1}}, // pshuflw
+      {TTI::SK_Reverse,          MVT::v4i8,  {3,3,3,3}}, // punpck/pshuflw/packus
+      {TTI::SK_Reverse,          MVT::v2i8,  {1,1,1,1}}, // punpck
+
+      {TTI::SK_Splice,           MVT::v4i16, {2,2,2,2}}, // punpck+psrldq
+      {TTI::SK_Splice,           MVT::v2i16, {2,2,2,2}}, // punpck+psrldq
+      {TTI::SK_Splice,           MVT::v4i8,  {2,2,2,2}}, // punpck+psrldq
+      {TTI::SK_Splice,           MVT::v2i8,  {2,2,2,2}}, // punpck+psrldq
+
+      {TTI::SK_PermuteTwoSrc,    MVT::v4i16, {2,2,2,2}}, // punpck/pshuflw
+      {TTI::SK_PermuteTwoSrc,    MVT::v2i16, {2,2,2,2}}, // punpck/pshuflw
+      {TTI::SK_PermuteTwoSrc,    MVT::v8i8,  {7,7,7,7}}, // punpck/pshuflw
+      {TTI::SK_PermuteTwoSrc,    MVT::v4i8,  {4,4,4,4}}, // punpck/pshuflw
+      {TTI::SK_PermuteTwoSrc,    MVT::v2i8,  {2,2,2,2}}, // punpck
+
+      {TTI::SK_PermuteSingleSrc, MVT::v4i16, {1,1,1,1}}, // pshuflw
+      {TTI::SK_PermuteSingleSrc, MVT::v2i16, {1,1,1,1}}, // pshuflw
+      {TTI::SK_PermuteSingleSrc, MVT::v8i8,  {5,5,5,5}}, // punpck/pshuflw
+      {TTI::SK_PermuteSingleSrc, MVT::v4i8,  {3,3,3,3}}, // punpck/pshuflw
+      {TTI::SK_PermuteSingleSrc, MVT::v2i8,  {1,1,1,1}}, // punpck
     };
 
     if (ST->hasSSE2())
       if (const auto *Entry =
               CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT()))
-        return Entry->Cost;
+        if (auto KindCost = Entry->Cost[CostKind])
+          return LT.first * *KindCost;
   }
 
   // We are going to permute multiple sources and the result will be in multiple
@@ -1803,57 +1804,57 @@ InstructionCost X86TTIImpl::getShuffleCost(
   if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
     return TTI::TCC_Basic;
 
-  static const CostTblEntry AVX512VBMIShuffleTbl[] = {
-      {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
-      {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
-
-      {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
-
-      {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, // vpermt2b
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // vpermt2b
-      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2}  // vpermt2b
+  static const CostKindTblEntry AVX512VBMIShuffleTbl[] = {
+    { TTI::SK_Reverse, MVT::v64i8,          { 1, 1, 1, 1 } }, // vpermb
+    { TTI::SK_Reverse, MVT::v32i8,          { 1, 1, 1, 1 } }, // vpermb
+    { TTI::SK_PermuteSingleSrc, MVT::v64i8, { 1, 1, 1, 1 } }, // vpermb
+    { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 1, 1, 1, 1 } }, // vpermb
+    { TTI::SK_PermuteTwoSrc, MVT::v64i8,    { 2, 2, 2, 2 } }, // vpermt2b
+    { TTI::SK_PermuteTwoSrc, MVT::v32i8,    { 2, 2, 2, 2 } }, // vpermt2b
+    { TTI::SK_PermuteTwoSrc, MVT::v16i8,    { 2, 2, 2, 2 } }  // vpermt2b
   };
 
   if (ST->hasVBMI())
     if (const auto *Entry =
             CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry AVX512BWShuffleTbl[] = {
-      {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
-      {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
-      {TTI::SK_Broadcast, MVT::v64i8, 1},  // vpbroadcastb
-
-      {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw
-      {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
-      {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw
-      {TTI::SK_Reverse, MVT::v64i8, 2},  // pshufb + vshufi64x2
-
-      {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw
-      {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw
-      {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
-      {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8},  // extend to v32i16
-
-      {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w
-      {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w
-      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2},  // vpermt2w
-      {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
-
-      {TTI::SK_Select, MVT::v32i16, 1}, // vblendmw
-      {TTI::SK_Select, MVT::v64i8,  1}, // vblendmb
-
-      {TTI::SK_Splice, MVT::v32i16, 2}, // vshufi64x2 + palignr
-      {TTI::SK_Splice, MVT::v32f16, 2}, // vshufi64x2 + palignr
-      {TTI::SK_Splice, MVT::v64i8,  2}, // vshufi64x2 + palignr
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry AVX512BWShuffleTbl[] = {
+    { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
+    { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
+    { TTI::SK_Broadcast, MVT::v64i8,  { 1, 1, 1, 1 } }, // vpbroadcastb
+
+    { TTI::SK_Reverse, MVT::v32i16,   { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_Reverse, MVT::v32f16,   { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_Reverse, MVT::v16i16,   { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_Reverse, MVT::v64i8,    { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
+
+    { TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
+    { TTI::SK_PermuteSingleSrc, MVT::v64i8,  { 8, 8, 8, 8 } },  // extend to v32i16
+
+    { TTI::SK_PermuteTwoSrc, MVT::v32i16,{  2,  2,  2,  2 } }, // vpermt2w
+    { TTI::SK_PermuteTwoSrc, MVT::v32f16,{  2,  2,  2,  2 } }, // vpermt2w
+    { TTI::SK_PermuteTwoSrc, MVT::v16i16,{  2,  2,  2,  2 } }, // vpermt2w
+    { TTI::SK_PermuteTwoSrc, MVT::v8i16, {  2,  2,  2,  2 } },  // vpermt2w
+    { TTI::SK_PermuteTwoSrc, MVT::v64i8, { 19, 19, 19, 19 } }, // 6 * v32i8 + 1
+
+    { TTI::SK_Select, MVT::v32i16, { 1, 1, 1, 1 } }, // vblendmw
+    { TTI::SK_Select, MVT::v64i8,  { 1, 1, 1, 1 } }, // vblendmb
+
+    { TTI::SK_Splice, MVT::v32i16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
+    { TTI::SK_Splice, MVT::v32f16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
+    { TTI::SK_Splice, MVT::v64i8,  { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
   };
 
   if (ST->hasBWI())
     if (const auto *Entry =
             CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
 
   static const CostKindTblEntry AVX512ShuffleTbl[] = {
       {TTI::SK_Broadcast, MVT::v8f64,  { 1, 1, 1, 1 } }, // vbroadcastsd
@@ -1934,286 +1935,288 @@ InstructionCost X86TTIImpl::getShuffleCost(
       if (auto KindCost = Entry->Cost[CostKind])
         return LT.first * *KindCost;
 
-  static const CostTblEntry AVX2InLaneShuffleTbl[] = {
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpshufb
-      {TTI::SK_PermuteSingleSrc, MVT::v16f16, 1}, // vpshufb
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8,  1}, // vpshufb
-
-      {TTI::SK_PermuteTwoSrc, MVT::v4f64,     2}, // 2*vshufpd + vblendpd
-      {TTI::SK_PermuteTwoSrc, MVT::v8f32,     2}, // 2*vshufps + vblendps
-      {TTI::SK_PermuteTwoSrc, MVT::v4i64,     2}, // 2*vpshufd + vpblendd
-      {TTI::SK_PermuteTwoSrc, MVT::v8i32,     2}, // 2*vpshufd + vpblendd
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16,    2}, // 2*vpshufb + vpor
-      {TTI::SK_PermuteTwoSrc, MVT::v16f16,    2}, // 2*vpshufb + vpor
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8,     2}, // 2*vpshufb + vpor
+  static const CostKindTblEntry AVX2InLaneShuffleTbl[] = {
+    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 1, 1, 1, 1 } }, // vpshufb
+    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 1, 1, 1, 1 } }, // vpshufb
+    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  { 1, 1, 1, 1 } }, // vpshufb
+
+    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,  { 2, 2, 2, 2 } }, // 2*vshufpd + vblendpd
+    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,  { 2, 2, 2, 2 } }, // 2*vshufps + vblendps
+    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,  { 2, 2, 2, 2 } }, // 2*vpshufd + vpblendd
+    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,  { 2, 2, 2, 2 } }, // 2*vpshufd + vpblendd
+    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
+    { TTI::SK_PermuteTwoSrc,    MVT::v16f16, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
+    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
   };
 
   if (IsInLaneShuffle && ST->hasAVX2())
     if (const auto *Entry =
             CostTableLookup(AVX2InLaneShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry AVX2ShuffleTbl[] = {
-      {TTI::SK_Broadcast, MVT::v4f64, 1},  // vbroadcastpd
-      {TTI::SK_Broadcast, MVT::v8f32, 1},  // vbroadcastps
-      {TTI::SK_Broadcast, MVT::v4i64, 1},  // vpbroadcastq
-      {TTI::SK_Broadcast, MVT::v8i32, 1},  // vpbroadcastd
-      {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
-      {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
-      {TTI::SK_Broadcast, MVT::v32i8, 1},  // vpbroadcastb
-
-      {TTI::SK_Reverse, MVT::v4f64, 1},  // vpermpd
-      {TTI::SK_Reverse, MVT::v8f32, 1},  // vpermps
-      {TTI::SK_Reverse, MVT::v4i64, 1},  // vpermq
-      {TTI::SK_Reverse, MVT::v8i32, 1},  // vpermd
-      {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
-      {TTI::SK_Reverse, MVT::v16f16, 2}, // vperm2i128 + pshufb
-      {TTI::SK_Reverse, MVT::v32i8, 2},  // vperm2i128 + pshufb
-
-      {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
-      {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb
-      {TTI::SK_Select, MVT::v32i8,  1}, // vpblendvb
-
-      {TTI::SK_Splice, MVT::v8i32,  2}, // vperm2i128 + vpalignr
-      {TTI::SK_Splice, MVT::v8f32,  2}, // vperm2i128 + vpalignr
-      {TTI::SK_Splice, MVT::v16i16, 2}, // vperm2i128 + vpalignr
-      {TTI::SK_Splice, MVT::v16f16, 2}, // vperm2i128 + vpalignr
-      {TTI::SK_Splice, MVT::v32i8,  2}, // vperm2i128 + vpalignr
-
-      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1},  // vpermpd
-      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1},  // vpermps
-      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1},  // vpermq
-      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1},  // vpermd
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
-                                                  // + vpblendvb
-      {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vperm2i128 + 2*vpshufb
-                                                  // + vpblendvb
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vperm2i128 + 2*vpshufb
-                                                  // + vpblendvb
-
-      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},  // 2*vpermpd + vblendpd
-      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3},  // 2*vpermps + vblendps
-      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},  // 2*vpermq + vpblendd
-      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3},  // 2*vpermd + vpblendd
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
-                                               // + vpblendvb
-      {TTI::SK_PermuteTwoSrc, MVT::v16f16, 7}, // 2*vperm2i128 + 4*vpshufb
-                                               // + vpblendvb
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7},  // 2*vperm2i128 + 4*vpshufb
-                                               // + vpblendvb
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry AVX2ShuffleTbl[] = {
+    { TTI::SK_Broadcast, MVT::v4f64,  { 1, 1, 1, 1 } }, // vbroadcastpd
+    { TTI::SK_Broadcast, MVT::v8f32,  { 1, 1, 1, 1 } }, // vbroadcastps
+    { TTI::SK_Broadcast, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpbroadcastq
+    { TTI::SK_Broadcast, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpbroadcastd
+    { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw
+    { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw
+    { TTI::SK_Broadcast, MVT::v32i8,  { 1, 1, 1, 1 } }, // vpbroadcastb
+
+    { TTI::SK_Reverse, MVT::v4f64,    { 1, 1, 1, 1 } }, // vpermpd
+    { TTI::SK_Reverse, MVT::v8f32,    { 1, 1, 1, 1 } }, // vpermps
+    { TTI::SK_Reverse, MVT::v4i64,    { 1, 1, 1, 1 } }, // vpermq
+    { TTI::SK_Reverse, MVT::v8i32,    { 1, 1, 1, 1 } }, // vpermd
+    { TTI::SK_Reverse, MVT::v16i16,   { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+    { TTI::SK_Reverse, MVT::v16f16,   { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+    { TTI::SK_Reverse, MVT::v32i8,    { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+
+    { TTI::SK_Select, MVT::v16i16,    { 1, 1, 1, 1 } }, // vpblendvb
+    { TTI::SK_Select, MVT::v16f16,    { 1, 1, 1, 1 } }, // vpblendvb
+    { TTI::SK_Select, MVT::v32i8,     { 1, 1, 1, 1 } }, // vpblendvb
+
+    { TTI::SK_Splice, MVT::v8i32,     { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+    { TTI::SK_Splice, MVT::v8f32,     { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+    { TTI::SK_Splice, MVT::v16i16,    { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+    { TTI::SK_Splice, MVT::v16f16,    { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+    { TTI::SK_Splice, MVT::v32i8,     { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
+
+    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  { 1, 1, 1, 1 } }, // vpermpd
+    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  { 1, 1, 1, 1 } }, // vpermps
+    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpermq
+    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpermd
+    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } },
+    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } },
+    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  { 4, 4, 4, 4 } },
+
+    { TTI::SK_PermuteTwoSrc, MVT::v4f64,  { 3, 3, 3, 3 } }, // 2*vpermpd + vblendpd
+    { TTI::SK_PermuteTwoSrc, MVT::v8f32,  { 3, 3, 3, 3 } }, // 2*vpermps + vblendps
+    { TTI::SK_PermuteTwoSrc, MVT::v4i64,  { 3, 3, 3, 3 } }, // 2*vpermq + vpblendd
+    { TTI::SK_PermuteTwoSrc, MVT::v8i32,  { 3, 3, 3, 3 } }, // 2*vpermd + vpblendd
+    { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 7, 7, 7, 7 } },
+    { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 7, 7, 7, 7 } },
+    { TTI::SK_PermuteTwoSrc, MVT::v32i8,  { 7, 7, 7, 7 } },
   };
 
   if (ST->hasAVX2())
     if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry XOPShuffleTbl[] = {
-      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vpermil2pd
-      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2},  // vperm2f128 + vpermil2ps
-      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vpermil2pd
-      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2},  // vperm2f128 + vpermil2ps
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm
-                                                  // + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vextractf128 + 2*vpperm
-                                                  // + vinsertf128
-
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm
-                                               // + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1},  // vpperm
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9},  // 2*vextractf128 + 6*vpperm
-                                               // + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1},  // vpperm
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry XOPShuffleTbl[] = {
+    { TTI::SK_PermuteSingleSrc, MVT::v4f64, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2pd
+    { TTI::SK_PermuteSingleSrc, MVT::v8f32, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2ps
+    { TTI::SK_PermuteSingleSrc, MVT::v4i64, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2pd
+    { TTI::SK_PermuteSingleSrc, MVT::v8i32, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2ps
+    { TTI::SK_PermuteSingleSrc, MVT::v16i16,{ 4, 4, 4, 4 } }, // vextractf128 + 2*vpperm
+                                                             // + vinsertf128
+    { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 4, 4, 4, 4 } }, // vextractf128 + 2*vpperm
+                                                             // + vinsertf128
+
+    { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 6*vpperm
+                                                            // + vinsertf128
+
+    { TTI::SK_PermuteTwoSrc, MVT::v8i16,  { 1, 1, 1, 1 } }, // vpperm
+    { TTI::SK_PermuteTwoSrc, MVT::v32i8,  { 9, 9, 9, 9 } }, // 2*vextractf128 + 6*vpperm
+                                                            // + vinsertf128
+    { TTI::SK_PermuteTwoSrc, MVT::v16i8,  { 1, 1, 1, 1 } }, // vpperm
   };
 
   if (ST->hasXOP())
     if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry AVX1InLaneShuffleTbl[] = {
-      {TTI::SK_PermuteSingleSrc, MVT::v4f64,  1},  // vpermilpd
-      {TTI::SK_PermuteSingleSrc, MVT::v4i64,  1},  // vpermilpd
-      {TTI::SK_PermuteSingleSrc, MVT::v8f32,  1},  // vpermilps
-      {TTI::SK_PermuteSingleSrc, MVT::v8i32,  1},  // vpermilps
-
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
-                                                  // + vpor + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vextractf128 + 2*pshufb
-                                                  // + vpor + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8,  4}, // vextractf128 + 2*pshufb
-                                                  // + vpor + vinsertf128
-
-      {TTI::SK_PermuteTwoSrc, MVT::v4f64,     2}, // 2*vshufpd + vblendpd
-      {TTI::SK_PermuteTwoSrc, MVT::v8f32,     2}, // 2*vshufps + vblendps
-      {TTI::SK_PermuteTwoSrc, MVT::v4i64,     2}, // 2*vpermilpd + vblendpd
-      {TTI::SK_PermuteTwoSrc, MVT::v8i32,     2}, // 2*vpermilps + vblendps
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16,    9}, // 2*vextractf128 + 4*pshufb
-                                                  // + 2*vpor + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v16f16,    9}, // 2*vextractf128 + 4*pshufb
-                                                  // + 2*vpor + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8,     9}, // 2*vextractf128 + 4*pshufb
-                                                  // + 2*vpor + vinsertf128
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry AVX1InLaneShuffleTbl[] = {
+    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  { 1, 1, 1, 1 } }, // vpermilpd
+    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpermilpd
+    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  { 1, 1, 1, 1 } }, // vpermilps
+    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpermilps
+
+    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
+                                                               // + vpor + vinsertf128
+    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
+                                                               // + vpor + vinsertf128
+    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
+                                                               // + vpor + vinsertf128
+
+    { TTI::SK_PermuteTwoSrc, MVT::v4f64,  { 2, 2, 2, 2 } }, // 2*vshufpd + vblendpd
+    { TTI::SK_PermuteTwoSrc, MVT::v8f32,  { 2, 2, 2, 2 } }, // 2*vshufps + vblendps
+    { TTI::SK_PermuteTwoSrc, MVT::v4i64,  { 2, 2, 2, 2 } }, // 2*vpermilpd + vblendpd
+    { TTI::SK_PermuteTwoSrc, MVT::v8i32,  { 2, 2, 2, 2 } }, // 2*vpermilps + vblendps
+    { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
+                                                            // + 2*vpor + vinsertf128
+    { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
+                                                            // + 2*vpor + vinsertf128
+    { TTI::SK_PermuteTwoSrc, MVT::v32i8,  { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
+                                                            // + 2*vpor + vinsertf128
   };
 
   if (IsInLaneShuffle && ST->hasAVX())
     if (const auto *Entry =
             CostTableLookup(AVX1InLaneShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry AVX1ShuffleTbl[] = {
-      {TTI::SK_Broadcast, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
-      {TTI::SK_Broadcast, MVT::v8f32, 2},  // vperm2f128 + vpermilps
-      {TTI::SK_Broadcast, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
-      {TTI::SK_Broadcast, MVT::v8i32, 2},  // vperm2f128 + vpermilps
-      {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
-      {TTI::SK_Broadcast, MVT::v16f16, 3}, // vpshuflw + vpshufd + vinsertf128
-      {TTI::SK_Broadcast, MVT::v32i8, 2},  // vpshufb + vinsertf128
-
-      {TTI::SK_Reverse, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
-      {TTI::SK_Reverse, MVT::v8f32, 2},  // vperm2f128 + vpermilps
-      {TTI::SK_Reverse, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
-      {TTI::SK_Reverse, MVT::v8i32, 2},  // vperm2f128 + vpermilps
-      {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
-                                         // + vinsertf128
-      {TTI::SK_Reverse, MVT::v16f16, 4}, // vextractf128 + 2*pshufb
-                                         // + vinsertf128
-      {TTI::SK_Reverse, MVT::v32i8, 4},  // vextractf128 + 2*pshufb
-                                         // + vinsertf128
-
-      {TTI::SK_Select, MVT::v4i64, 1},  // vblendpd
-      {TTI::SK_Select, MVT::v4f64, 1},  // vblendpd
-      {TTI::SK_Select, MVT::v8i32, 1},  // vblendps
-      {TTI::SK_Select, MVT::v8f32, 1},  // vblendps
-      {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
-      {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor
-      {TTI::SK_Select, MVT::v32i8, 3},  // vpand + vpandn + vpor
-
-      {TTI::SK_Splice, MVT::v4i64,  2}, // vperm2f128 + shufpd
-      {TTI::SK_Splice, MVT::v4f64,  2}, // vperm2f128 + shufpd
-      {TTI::SK_Splice, MVT::v8i32,  4}, // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_Splice, MVT::v8f32,  4}, // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_Splice, MVT::v16i16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-      {TTI::SK_Splice, MVT::v16f16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-      {TTI::SK_Splice, MVT::v32i8,  5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-
-      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vshufpd
-      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vshufpd
-      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4},  // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4},  // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
-                                                  // + 2*por + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v16f16, 8}, // vextractf128 + 4*pshufb
-                                                  // + 2*por + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8},  // vextractf128 + 4*pshufb
-                                                  // + 2*por + vinsertf128
-
-      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},   // 2*vperm2f128 + vshufpd
-      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},   // 2*vperm2f128 + vshufpd
-      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4},   // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4},   // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
-                                                // + 4*por + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v16f16, 15}, // 2*vextractf128 + 8*pshufb
-                                                // + 4*por + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15},  // 2*vextractf128 + 8*pshufb
-                                                // + 4*por + vinsertf128
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry AVX1ShuffleTbl[] = {
+      {TTI::SK_Broadcast, MVT::v4f64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
+      {TTI::SK_Broadcast, MVT::v8f32,  {2,2,2,2}}, // vperm2f128 + vpermilps
+      {TTI::SK_Broadcast, MVT::v4i64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
+      {TTI::SK_Broadcast, MVT::v8i32,  {2,2,2,2}}, // vperm2f128 + vpermilps
+      {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
+      {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
+      {TTI::SK_Broadcast, MVT::v32i8,  {2,2,2,2}}, // vpshufb + vinsertf128
+
+      {TTI::SK_Reverse, MVT::v4f64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
+      {TTI::SK_Reverse, MVT::v8f32,  {2,2,2,2}}, // vperm2f128 + vpermilps
+      {TTI::SK_Reverse, MVT::v4i64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
+      {TTI::SK_Reverse, MVT::v8i32,  {2,2,2,2}}, // vperm2f128 + vpermilps
+      {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+                                                 // + vinsertf128
+      {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+                                                 // + vinsertf128
+      {TTI::SK_Reverse, MVT::v32i8,  {4,4,4,4}}, // vextractf128 + 2*pshufb
+                                                 // + vinsertf128
+
+      {TTI::SK_Select, MVT::v4i64,  {1,1,1,1}}, // vblendpd
+      {TTI::SK_Select, MVT::v4f64,  {1,1,1,1}}, // vblendpd
+      {TTI::SK_Select, MVT::v8i32,  {1,1,1,1}}, // vblendps
+      {TTI::SK_Select, MVT::v8f32,  {1,1,1,1}}, // vblendps
+      {TTI::SK_Select, MVT::v16i16, {3,3,3,3}}, // vpand + vpandn + vpor
+      {TTI::SK_Select, MVT::v16f16, {3,3,3,3}}, // vpand + vpandn + vpor
+      {TTI::SK_Select, MVT::v32i8,  {3,3,3,3}}, // vpand + vpandn + vpor
+
+      {TTI::SK_Splice, MVT::v4i64,  {2,2,2,2}}, // vperm2f128 + shufpd
+      {TTI::SK_Splice, MVT::v4f64,  {2,2,2,2}}, // vperm2f128 + shufpd
+      {TTI::SK_Splice, MVT::v8i32,  {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_Splice, MVT::v8f32,  {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_Splice, MVT::v16i16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+      {TTI::SK_Splice, MVT::v16f16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+      {TTI::SK_Splice, MVT::v32i8,  {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vshufpd
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vshufpd
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16,{8,8,8,8}}, // vextractf128 + 4*pshufb
+                                                         // + 2*por + vinsertf128
+      {TTI::SK_PermuteSingleSrc, MVT::v16f16,{8,8,8,8}}, // vextractf128 + 4*pshufb
+                                                         // + 2*por + vinsertf128
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {8,8,8,8}}, // vextractf128 + 4*pshufb
+                                                         // + 2*por + vinsertf128
+
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {3,3,3,3}},   // 2*vperm2f128 + vshufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, {3,3,3,3}},   // 2*vperm2f128 + vshufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {4,4,4,4}},   // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, {4,4,4,4}},   // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
+                                                          // + 4*por + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v16f16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
+                                                          // + 4*por + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {15,15,15,15}}, // 2*vextractf128 + 8*pshufb
+                                                         // + 4*por + vinsertf128
   };
 
   if (ST->hasAVX())
     if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry SSE41ShuffleTbl[] = {
-      {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
-      {TTI::SK_Select, MVT::v2f64, 1}, // movsd
-      {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
-      {TTI::SK_Select, MVT::v4f32, 1}, // blendps
-      {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
-      {TTI::SK_Select, MVT::v8f16, 1}, // pblendw
-      {TTI::SK_Select, MVT::v16i8, 1}  // pblendvb
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry SSE41ShuffleTbl[] = {
+      {TTI::SK_Select, MVT::v2i64, {1,1,1,1}}, // pblendw
+      {TTI::SK_Select, MVT::v2f64, {1,1,1,1}}, // movsd
+      {TTI::SK_Select, MVT::v4i32, {1,1,1,1}}, // pblendw
+      {TTI::SK_Select, MVT::v4f32, {1,1,1,1}}, // blendps
+      {TTI::SK_Select, MVT::v8i16, {1,1,1,1}}, // pblendw
+      {TTI::SK_Select, MVT::v8f16, {1,1,1,1}}, // pblendw
+      {TTI::SK_Select, MVT::v16i8, {1,1,1,1}}  // pblendvb
   };
 
   if (ST->hasSSE41())
     if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry SSSE3ShuffleTbl[] = {
-      {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
-      {TTI::SK_Broadcast, MVT::v8f16, 1}, // pshufb
-      {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
-
-      {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
-      {TTI::SK_Reverse, MVT::v8f16, 1}, // pshufb
-      {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
-
-      {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
-      {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por
-      {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
-
-      {TTI::SK_Splice, MVT::v4i32, 1}, // palignr
-      {TTI::SK_Splice, MVT::v4f32, 1}, // palignr
-      {TTI::SK_Splice, MVT::v8i16, 1}, // palignr
-      {TTI::SK_Splice, MVT::v8f16, 1}, // palignr
-      {TTI::SK_Splice, MVT::v16i8, 1}, // palignr
-
-      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
-      {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb
-      {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
-
-      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
-      {TTI::SK_PermuteTwoSrc, MVT::v8f16, 3}, // 2*pshufb + por
-      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry SSSE3ShuffleTbl[] = {
+      {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
+      {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
+      {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+
+      {TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
+      {TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
+      {TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+
+      {TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
+      {TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
+      {TTI::SK_Select, MVT::v16i8, {3, 3, 3, 3}}, // 2*pshufb + por
+
+      {TTI::SK_Splice, MVT::v4i32, {1, 1, 1, 1}}, // palignr
+      {TTI::SK_Splice, MVT::v4f32, {1, 1, 1, 1}}, // palignr
+      {TTI::SK_Splice, MVT::v8i16, {1, 1, 1, 1}}, // palignr
+      {TTI::SK_Splice, MVT::v8f16, {1, 1, 1, 1}}, // palignr
+      {TTI::SK_Splice, MVT::v16i8, {1, 1, 1, 1}}, // palignr
+
+      {TTI::SK_PermuteSingleSrc, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
+      {TTI::SK_PermuteTwoSrc, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, {3, 3, 3, 3}}, // 2*pshufb + por
   };
 
   if (ST->hasSSSE3())
     if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry SSE2ShuffleTbl[] = {
-      {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
-      {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
-      {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
-      {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
-      {TTI::SK_Broadcast, MVT::v8f16, 2}, // pshuflw + pshufd
-      {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
-
-      {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
-      {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
-      {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
-      {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
-      {TTI::SK_Reverse, MVT::v8f16, 3}, // pshuflw + pshufhw + pshufd
-      {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
-                                        // + 2*pshufd + 2*unpck + packus
-
-      {TTI::SK_Select, MVT::v2i64, 1}, // movsd
-      {TTI::SK_Select, MVT::v2f64, 1}, // movsd
-      {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
-      {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
-      {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por
-      {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
-
-      {TTI::SK_Splice, MVT::v2i64, 1}, // shufpd
-      {TTI::SK_Splice, MVT::v2f64, 1}, // shufpd
-      {TTI::SK_Splice, MVT::v4i32, 2}, // 2*{unpck,movsd,pshufd}
-      {TTI::SK_Splice, MVT::v8i16, 3}, // psrldq + psrlldq + por
-      {TTI::SK_Splice, MVT::v8f16, 3}, // psrldq + psrlldq + por
-      {TTI::SK_Splice, MVT::v16i8, 3}, // psrldq + psrlldq + por
-
-      {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
-      {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
-      {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
-      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
-                                                  // + pshufd/unpck
-      {TTI::SK_PermuteSingleSrc, MVT::v8f16, 5}, // 2*pshuflw + 2*pshufhw
-                                                  // + pshufd/unpck
-    { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
-                                                  // + 2*pshufd + 2*unpck + 2*packus
-
-    { TTI::SK_PermuteTwoSrc,    MVT::v2f64,  1 }, // shufpd
-    { TTI::SK_PermuteTwoSrc,    MVT::v2i64,  1 }, // shufpd
-    { TTI::SK_PermuteTwoSrc,    MVT::v4i32,  2 }, // 2*{unpck,movsd,pshufd}
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i16,  8 }, // blend+permute
-    { TTI::SK_PermuteTwoSrc,    MVT::v8f16,  8 }, // blend+permute
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i8, 13 }, // blend+permute
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+
+  static const CostKindTblEntry SSE2ShuffleTbl[] = {
+      {TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
+      {TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
+      {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd
+      {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd
+      {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd
+
+      {TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
+      {TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
+      {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+      {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+      {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
+                                                   // + 2*pshufd + 2*unpck + packus
+
+      {TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd
+      {TTI::SK_Select, MVT::v2f64, {1, 1, 1, 1}}, // movsd
+      {TTI::SK_Select, MVT::v4i32, {2, 2, 2, 2}}, // 2*shufps
+      {TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // pand + pandn + por
+      {TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // pand + pandn + por
+      {TTI::SK_Select, MVT::v16i8, {3, 3, 3, 3}}, // pand + pandn + por
+
+      {TTI::SK_Splice, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_Splice, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_Splice, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
+      {TTI::SK_Splice, MVT::v8i16, {3, 3, 3, 3}}, // psrldq + psrlldq + por
+      {TTI::SK_Splice, MVT::v8f16, {3, 3, 3, 3}}, // psrldq + psrlldq + por
+      {TTI::SK_Splice, MVT::v16i8, {3, 3, 3, 3}}, // psrldq + psrlldq + por
+
+      {TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
+      {TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
+      {TTI::SK_PermuteSingleSrc, MVT::v8i16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+                                                            // + pshufd/unpck
+      {TTI::SK_PermuteSingleSrc, MVT::v8f16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+                                                            // + pshufd/unpck
+      {TTI::SK_PermuteSingleSrc, MVT::v16i8, {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
+                                                                // + 2*pshufd + 2*unpck + 2*packus
+
+      {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}},     // shufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}},     // shufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}},     // 2*{unpck,movsd,pshufd}
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, {8, 8, 8, 8}},     // blend+permute
+      {TTI::SK_PermuteTwoSrc, MVT::v8f16, {8, 8, 8, 8}},     // blend+permute
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, {13, 13, 13, 13}}, // blend+permute
   };
 
   static const CostTblEntry SSE3BroadcastLoadTbl[] = {
@@ -2233,16 +2236,17 @@ InstructionCost X86TTIImpl::getShuffleCost(
       }
 
     if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
   }
 
-  static const CostTblEntry SSE1ShuffleTbl[] = {
-    { TTI::SK_Broadcast,        MVT::v4f32, 1 }, // shufps
-    { TTI::SK_Reverse,          MVT::v4f32, 1 }, // shufps
-    { TTI::SK_Select,           MVT::v4f32, 2 }, // 2*shufps
-    { TTI::SK_Splice,           MVT::v4f32, 2 }, // 2*shufps
-    { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f32, 2 }, // 2*shufps
+  static const CostKindTblEntry SSE1ShuffleTbl[] = {
+    { TTI::SK_Broadcast,        MVT::v4f32, {1,1,1,1} }, // shufps
+    { TTI::SK_Reverse,          MVT::v4f32, {1,1,1,1} }, // shufps
+    { TTI::SK_Select,           MVT::v4f32, {2,2,2,2} }, // 2*shufps
+    { TTI::SK_Splice,           MVT::v4f32, {2,2,2,2} }, // 2*shufps
+    { TTI::SK_PermuteSingleSrc, MVT::v4f32, {1,1,1,1} }, // shufps
+    { TTI::SK_PermuteTwoSrc,    MVT::v4f32, {2,2,2,2} }, // 2*shufps
   };
 
   if (ST->hasSSE1()) {
@@ -2255,7 +2259,8 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return 1;
     }
     if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
-      return LT.first * Entry->Cost;
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
   }
 
   return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);



More information about the llvm-commits mailing list