[llvm] [CostModel][X86] getShuffleCosts - convert all shuffle cost tables to be CostKind compatible. NFC. (PR #124753)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 06:25:13 PST 2025


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->


:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 8fe7860610e3ff699831e11e4d57e38a198c40e8 79805403b23750b69616213fb81375711a06ebaa --extensions cpp -- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 82523bb655..7015965fcb 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1673,34 +1673,34 @@ InstructionCost X86TTIImpl::getShuffleCost(
   EVT VT = TLI->getValueType(DL, BaseTp);
   if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
       !ST->hasSSSE3()) {
-     static const CostKindTblEntry SSE2SubVectorShuffleTbl[] = {
-      {TTI::SK_Broadcast,        MVT::v4i16, {1,1,1,1}}, // pshuflw
-      {TTI::SK_Broadcast,        MVT::v2i16, {1,1,1,1}}, // pshuflw
-      {TTI::SK_Broadcast,        MVT::v8i8,  {2,2,2,2}}, // punpck/pshuflw
-      {TTI::SK_Broadcast,        MVT::v4i8,  {2,2,2,2}}, // punpck/pshuflw
-      {TTI::SK_Broadcast,        MVT::v2i8,  {1,1,1,1}}, // punpck
-
-      {TTI::SK_Reverse,          MVT::v4i16, {1,1,1,1}}, // pshuflw
-      {TTI::SK_Reverse,          MVT::v2i16, {1,1,1,1}}, // pshuflw
-      {TTI::SK_Reverse,          MVT::v4i8,  {3,3,3,3}}, // punpck/pshuflw/packus
-      {TTI::SK_Reverse,          MVT::v2i8,  {1,1,1,1}}, // punpck
-
-      {TTI::SK_Splice,           MVT::v4i16, {2,2,2,2}}, // punpck+psrldq
-      {TTI::SK_Splice,           MVT::v2i16, {2,2,2,2}}, // punpck+psrldq
-      {TTI::SK_Splice,           MVT::v4i8,  {2,2,2,2}}, // punpck+psrldq
-      {TTI::SK_Splice,           MVT::v2i8,  {2,2,2,2}}, // punpck+psrldq
-
-      {TTI::SK_PermuteTwoSrc,    MVT::v4i16, {2,2,2,2}}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v2i16, {2,2,2,2}}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v8i8,  {7,7,7,7}}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v4i8,  {4,4,4,4}}, // punpck/pshuflw
-      {TTI::SK_PermuteTwoSrc,    MVT::v2i8,  {2,2,2,2}}, // punpck
-
-      {TTI::SK_PermuteSingleSrc, MVT::v4i16, {1,1,1,1}}, // pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v2i16, {1,1,1,1}}, // pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v8i8,  {5,5,5,5}}, // punpck/pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v4i8,  {3,3,3,3}}, // punpck/pshuflw
-      {TTI::SK_PermuteSingleSrc, MVT::v2i8,  {1,1,1,1}}, // punpck
+    static const CostKindTblEntry SSE2SubVectorShuffleTbl[] = {
+        {TTI::SK_Broadcast, MVT::v4i16, {1, 1, 1, 1}}, // pshuflw
+        {TTI::SK_Broadcast, MVT::v2i16, {1, 1, 1, 1}}, // pshuflw
+        {TTI::SK_Broadcast, MVT::v8i8, {2, 2, 2, 2}},  // punpck/pshuflw
+        {TTI::SK_Broadcast, MVT::v4i8, {2, 2, 2, 2}},  // punpck/pshuflw
+        {TTI::SK_Broadcast, MVT::v2i8, {1, 1, 1, 1}},  // punpck
+
+        {TTI::SK_Reverse, MVT::v4i16, {1, 1, 1, 1}}, // pshuflw
+        {TTI::SK_Reverse, MVT::v2i16, {1, 1, 1, 1}}, // pshuflw
+        {TTI::SK_Reverse, MVT::v4i8, {3, 3, 3, 3}},  // punpck/pshuflw/packus
+        {TTI::SK_Reverse, MVT::v2i8, {1, 1, 1, 1}},  // punpck
+
+        {TTI::SK_Splice, MVT::v4i16, {2, 2, 2, 2}}, // punpck+psrldq
+        {TTI::SK_Splice, MVT::v2i16, {2, 2, 2, 2}}, // punpck+psrldq
+        {TTI::SK_Splice, MVT::v4i8, {2, 2, 2, 2}},  // punpck+psrldq
+        {TTI::SK_Splice, MVT::v2i8, {2, 2, 2, 2}},  // punpck+psrldq
+
+        {TTI::SK_PermuteTwoSrc, MVT::v4i16, {2, 2, 2, 2}}, // punpck/pshuflw
+        {TTI::SK_PermuteTwoSrc, MVT::v2i16, {2, 2, 2, 2}}, // punpck/pshuflw
+        {TTI::SK_PermuteTwoSrc, MVT::v8i8, {7, 7, 7, 7}},  // punpck/pshuflw
+        {TTI::SK_PermuteTwoSrc, MVT::v4i8, {4, 4, 4, 4}},  // punpck/pshuflw
+        {TTI::SK_PermuteTwoSrc, MVT::v2i8, {2, 2, 2, 2}},  // punpck
+
+        {TTI::SK_PermuteSingleSrc, MVT::v4i16, {1, 1, 1, 1}}, // pshuflw
+        {TTI::SK_PermuteSingleSrc, MVT::v2i16, {1, 1, 1, 1}}, // pshuflw
+        {TTI::SK_PermuteSingleSrc, MVT::v8i8, {5, 5, 5, 5}},  // punpck/pshuflw
+        {TTI::SK_PermuteSingleSrc, MVT::v4i8, {3, 3, 3, 3}},  // punpck/pshuflw
+        {TTI::SK_PermuteSingleSrc, MVT::v2i8, {1, 1, 1, 1}},  // punpck
     };
 
     if (ST->hasSSE2())
@@ -1805,13 +1805,13 @@ InstructionCost X86TTIImpl::getShuffleCost(
     return TTI::TCC_Basic;
 
   static const CostKindTblEntry AVX512VBMIShuffleTbl[] = {
-    { TTI::SK_Reverse, MVT::v64i8,          { 1, 1, 1, 1 } }, // vpermb
-    { TTI::SK_Reverse, MVT::v32i8,          { 1, 1, 1, 1 } }, // vpermb
-    { TTI::SK_PermuteSingleSrc, MVT::v64i8, { 1, 1, 1, 1 } }, // vpermb
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 1, 1, 1, 1 } }, // vpermb
-    { TTI::SK_PermuteTwoSrc, MVT::v64i8,    { 2, 2, 2, 2 } }, // vpermt2b
-    { TTI::SK_PermuteTwoSrc, MVT::v32i8,    { 2, 2, 2, 2 } }, // vpermt2b
-    { TTI::SK_PermuteTwoSrc, MVT::v16i8,    { 2, 2, 2, 2 } }  // vpermt2b
+      {TTI::SK_Reverse, MVT::v64i8, {1, 1, 1, 1}},          // vpermb
+      {TTI::SK_Reverse, MVT::v32i8, {1, 1, 1, 1}},          // vpermb
+      {TTI::SK_PermuteSingleSrc, MVT::v64i8, {1, 1, 1, 1}}, // vpermb
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {1, 1, 1, 1}}, // vpermb
+      {TTI::SK_PermuteTwoSrc, MVT::v64i8, {2, 2, 2, 2}},    // vpermt2b
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {2, 2, 2, 2}},    // vpermt2b
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, {2, 2, 2, 2}}     // vpermt2b
   };
 
   if (ST->hasVBMI())
@@ -1821,33 +1821,33 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry AVX512BWShuffleTbl[] = {
-    { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
-    { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
-    { TTI::SK_Broadcast, MVT::v64i8,  { 1, 1, 1, 1 } }, // vpbroadcastb
-
-    { TTI::SK_Reverse, MVT::v32i16,   { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_Reverse, MVT::v32f16,   { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_Reverse, MVT::v16i16,   { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_Reverse, MVT::v64i8,    { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
-
-    { TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v64i8,  { 8, 8, 8, 8 } },  // extend to v32i16
-
-    { TTI::SK_PermuteTwoSrc, MVT::v32i16,{  2,  2,  2,  2 } }, // vpermt2w
-    { TTI::SK_PermuteTwoSrc, MVT::v32f16,{  2,  2,  2,  2 } }, // vpermt2w
-    { TTI::SK_PermuteTwoSrc, MVT::v16i16,{  2,  2,  2,  2 } }, // vpermt2w
-    { TTI::SK_PermuteTwoSrc, MVT::v8i16, {  2,  2,  2,  2 } },  // vpermt2w
-    { TTI::SK_PermuteTwoSrc, MVT::v64i8, { 19, 19, 19, 19 } }, // 6 * v32i8 + 1
-
-    { TTI::SK_Select, MVT::v32i16, { 1, 1, 1, 1 } }, // vblendmw
-    { TTI::SK_Select, MVT::v64i8,  { 1, 1, 1, 1 } }, // vblendmb
-
-    { TTI::SK_Splice, MVT::v32i16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
-    { TTI::SK_Splice, MVT::v32f16, { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
-    { TTI::SK_Splice, MVT::v64i8,  { 2, 2, 2, 2 } }, // vshufi64x2 + palignr
+      {TTI::SK_Broadcast, MVT::v32i16, {1, 1, 1, 1}}, // vpbroadcastw
+      {TTI::SK_Broadcast, MVT::v32f16, {1, 1, 1, 1}}, // vpbroadcastw
+      {TTI::SK_Broadcast, MVT::v64i8, {1, 1, 1, 1}},  // vpbroadcastb
+
+      {TTI::SK_Reverse, MVT::v32i16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_Reverse, MVT::v32f16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_Reverse, MVT::v16i16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_Reverse, MVT::v64i8, {2, 2, 2, 2}},  // pshufb + vshufi64x2
+
+      {TTI::SK_PermuteSingleSrc, MVT::v32i16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v32f16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v16f16, {2, 2, 2, 2}}, // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v64i8, {8, 8, 8, 8}},  // extend to v32i16
+
+      {TTI::SK_PermuteTwoSrc, MVT::v32i16, {2, 2, 2, 2}},    // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v32f16, {2, 2, 2, 2}},    // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, {2, 2, 2, 2}},    // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, {2, 2, 2, 2}},     // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v64i8, {19, 19, 19, 19}}, // 6 * v32i8 + 1
+
+      {TTI::SK_Select, MVT::v32i16, {1, 1, 1, 1}}, // vblendmw
+      {TTI::SK_Select, MVT::v64i8, {1, 1, 1, 1}},  // vblendmb
+
+      {TTI::SK_Splice, MVT::v32i16, {2, 2, 2, 2}}, // vshufi64x2 + palignr
+      {TTI::SK_Splice, MVT::v32f16, {2, 2, 2, 2}}, // vshufi64x2 + palignr
+      {TTI::SK_Splice, MVT::v64i8, {2, 2, 2, 2}},  // vshufi64x2 + palignr
   };
 
   if (ST->hasBWI())
@@ -1936,17 +1936,17 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry AVX2InLaneShuffleTbl[] = {
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 1, 1, 1, 1 } }, // vpshufb
-    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 1, 1, 1, 1 } }, // vpshufb
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  { 1, 1, 1, 1 } }, // vpshufb
-
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,  { 2, 2, 2, 2 } }, // 2*vshufpd + vblendpd
-    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,  { 2, 2, 2, 2 } }, // 2*vshufps + vblendps
-    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,  { 2, 2, 2, 2 } }, // 2*vpshufd + vpblendd
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,  { 2, 2, 2, 2 } }, // 2*vpshufd + vpblendd
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
-    { TTI::SK_PermuteTwoSrc,    MVT::v16f16, { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  { 2, 2, 2, 2 } }, // 2*vpshufb + vpor
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, {1, 1, 1, 1}}, // vpshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v16f16, {1, 1, 1, 1}}, // vpshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {1, 1, 1, 1}},  // vpshufb
+
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {2, 2, 2, 2}}, // 2*vshufpd + vblendpd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {2, 2, 2, 2}}, // 2*vshufps + vblendps
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, {2, 2, 2, 2}}, // 2*vpshufd + vpblendd
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, {2, 2, 2, 2}}, // 2*vpshufd + vpblendd
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, {2, 2, 2, 2}}, // 2*vpshufb + vpor
+      {TTI::SK_PermuteTwoSrc, MVT::v16f16, {2, 2, 2, 2}}, // 2*vpshufb + vpor
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {2, 2, 2, 2}},  // 2*vpshufb + vpor
   };
 
   if (IsInLaneShuffle && ST->hasAVX2())
@@ -1956,47 +1956,47 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry AVX2ShuffleTbl[] = {
-    { TTI::SK_Broadcast, MVT::v4f64,  { 1, 1, 1, 1 } }, // vbroadcastpd
-    { TTI::SK_Broadcast, MVT::v8f32,  { 1, 1, 1, 1 } }, // vbroadcastps
-    { TTI::SK_Broadcast, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpbroadcastq
-    { TTI::SK_Broadcast, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpbroadcastd
-    { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw
-    { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw
-    { TTI::SK_Broadcast, MVT::v32i8,  { 1, 1, 1, 1 } }, // vpbroadcastb
-
-    { TTI::SK_Reverse, MVT::v4f64,    { 1, 1, 1, 1 } }, // vpermpd
-    { TTI::SK_Reverse, MVT::v8f32,    { 1, 1, 1, 1 } }, // vpermps
-    { TTI::SK_Reverse, MVT::v4i64,    { 1, 1, 1, 1 } }, // vpermq
-    { TTI::SK_Reverse, MVT::v8i32,    { 1, 1, 1, 1 } }, // vpermd
-    { TTI::SK_Reverse, MVT::v16i16,   { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
-    { TTI::SK_Reverse, MVT::v16f16,   { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
-    { TTI::SK_Reverse, MVT::v32i8,    { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
-
-    { TTI::SK_Select, MVT::v16i16,    { 1, 1, 1, 1 } }, // vpblendvb
-    { TTI::SK_Select, MVT::v16f16,    { 1, 1, 1, 1 } }, // vpblendvb
-    { TTI::SK_Select, MVT::v32i8,     { 1, 1, 1, 1 } }, // vpblendvb
-
-    { TTI::SK_Splice, MVT::v8i32,     { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
-    { TTI::SK_Splice, MVT::v8f32,     { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
-    { TTI::SK_Splice, MVT::v16i16,    { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
-    { TTI::SK_Splice, MVT::v16f16,    { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
-    { TTI::SK_Splice, MVT::v32i8,     { 2, 2, 2, 2 } }, // vperm2i128 + vpalignr
-
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  { 1, 1, 1, 1 } }, // vpermpd
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  { 1, 1, 1, 1 } }, // vpermps
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpermq
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpermd
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } },
-    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } },
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  { 4, 4, 4, 4 } },
-
-    { TTI::SK_PermuteTwoSrc, MVT::v4f64,  { 3, 3, 3, 3 } }, // 2*vpermpd + vblendpd
-    { TTI::SK_PermuteTwoSrc, MVT::v8f32,  { 3, 3, 3, 3 } }, // 2*vpermps + vblendps
-    { TTI::SK_PermuteTwoSrc, MVT::v4i64,  { 3, 3, 3, 3 } }, // 2*vpermq + vpblendd
-    { TTI::SK_PermuteTwoSrc, MVT::v8i32,  { 3, 3, 3, 3 } }, // 2*vpermd + vpblendd
-    { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 7, 7, 7, 7 } },
-    { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 7, 7, 7, 7 } },
-    { TTI::SK_PermuteTwoSrc, MVT::v32i8,  { 7, 7, 7, 7 } },
+      {TTI::SK_Broadcast, MVT::v4f64, {1, 1, 1, 1}},  // vbroadcastpd
+      {TTI::SK_Broadcast, MVT::v8f32, {1, 1, 1, 1}},  // vbroadcastps
+      {TTI::SK_Broadcast, MVT::v4i64, {1, 1, 1, 1}},  // vpbroadcastq
+      {TTI::SK_Broadcast, MVT::v8i32, {1, 1, 1, 1}},  // vpbroadcastd
+      {TTI::SK_Broadcast, MVT::v16i16, {1, 1, 1, 1}}, // vpbroadcastw
+      {TTI::SK_Broadcast, MVT::v16f16, {1, 1, 1, 1}}, // vpbroadcastw
+      {TTI::SK_Broadcast, MVT::v32i8, {1, 1, 1, 1}},  // vpbroadcastb
+
+      {TTI::SK_Reverse, MVT::v4f64, {1, 1, 1, 1}},  // vpermpd
+      {TTI::SK_Reverse, MVT::v8f32, {1, 1, 1, 1}},  // vpermps
+      {TTI::SK_Reverse, MVT::v4i64, {1, 1, 1, 1}},  // vpermq
+      {TTI::SK_Reverse, MVT::v8i32, {1, 1, 1, 1}},  // vpermd
+      {TTI::SK_Reverse, MVT::v16i16, {2, 2, 2, 2}}, // vperm2i128 + pshufb
+      {TTI::SK_Reverse, MVT::v16f16, {2, 2, 2, 2}}, // vperm2i128 + pshufb
+      {TTI::SK_Reverse, MVT::v32i8, {2, 2, 2, 2}},  // vperm2i128 + pshufb
+
+      {TTI::SK_Select, MVT::v16i16, {1, 1, 1, 1}}, // vpblendvb
+      {TTI::SK_Select, MVT::v16f16, {1, 1, 1, 1}}, // vpblendvb
+      {TTI::SK_Select, MVT::v32i8, {1, 1, 1, 1}},  // vpblendvb
+
+      {TTI::SK_Splice, MVT::v8i32, {2, 2, 2, 2}},  // vperm2i128 + vpalignr
+      {TTI::SK_Splice, MVT::v8f32, {2, 2, 2, 2}},  // vperm2i128 + vpalignr
+      {TTI::SK_Splice, MVT::v16i16, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+      {TTI::SK_Splice, MVT::v16f16, {2, 2, 2, 2}}, // vperm2i128 + vpalignr
+      {TTI::SK_Splice, MVT::v32i8, {2, 2, 2, 2}},  // vperm2i128 + vpalignr
+
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, {1, 1, 1, 1}}, // vpermpd
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, {1, 1, 1, 1}}, // vpermps
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, {1, 1, 1, 1}}, // vpermq
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, {1, 1, 1, 1}}, // vpermd
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, {4, 4, 4, 4}},
+      {TTI::SK_PermuteSingleSrc, MVT::v16f16, {4, 4, 4, 4}},
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {4, 4, 4, 4}},
+
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {3, 3, 3, 3}}, // 2*vpermpd + vblendpd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {3, 3, 3, 3}}, // 2*vpermps + vblendps
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, {3, 3, 3, 3}}, // 2*vpermq + vpblendd
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, {3, 3, 3, 3}}, // 2*vpermd + vpblendd
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, {7, 7, 7, 7}},
+      {TTI::SK_PermuteTwoSrc, MVT::v16f16, {7, 7, 7, 7}},
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {7, 7, 7, 7}},
   };
 
   if (ST->hasAVX2())
@@ -2005,22 +2005,38 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry XOPShuffleTbl[] = {
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2pd
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2ps
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2pd
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32, { 2, 2, 2, 2 } }, // vperm2f128 + vpermil2ps
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16,{ 4, 4, 4, 4 } }, // vextractf128 + 2*vpperm
-                                                             // + vinsertf128
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8, { 4, 4, 4, 4 } }, // vextractf128 + 2*vpperm
-                                                             // + vinsertf128
-
-    { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 6*vpperm
-                                                            // + vinsertf128
-
-    { TTI::SK_PermuteTwoSrc, MVT::v8i16,  { 1, 1, 1, 1 } }, // vpperm
-    { TTI::SK_PermuteTwoSrc, MVT::v32i8,  { 9, 9, 9, 9 } }, // 2*vextractf128 + 6*vpperm
-                                                            // + vinsertf128
-    { TTI::SK_PermuteTwoSrc, MVT::v16i8,  { 1, 1, 1, 1 } }, // vpperm
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v4f64,
+       {2, 2, 2, 2}}, // vperm2f128 + vpermil2pd
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v8f32,
+       {2, 2, 2, 2}}, // vperm2f128 + vpermil2ps
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v4i64,
+       {2, 2, 2, 2}}, // vperm2f128 + vpermil2pd
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v8i32,
+       {2, 2, 2, 2}}, // vperm2f128 + vpermil2ps
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v16i16,
+       {4, 4, 4, 4}}, // vextractf128 + 2*vpperm
+                      // + vinsertf128
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v32i8,
+       {4, 4, 4, 4}}, // vextractf128 + 2*vpperm
+                      // + vinsertf128
+
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v16i16,
+       {9, 9, 9, 9}}, // 2*vextractf128 + 6*vpperm
+                      // + vinsertf128
+
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, {1, 1, 1, 1}}, // vpperm
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v32i8,
+       {9, 9, 9, 9}}, // 2*vextractf128 + 6*vpperm
+                      // + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, {1, 1, 1, 1}}, // vpperm
   };
 
   if (ST->hasXOP())
@@ -2029,28 +2045,44 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry AVX1InLaneShuffleTbl[] = {
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  { 1, 1, 1, 1 } }, // vpermilpd
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpermilpd
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  { 1, 1, 1, 1 } }, // vpermilps
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpermilps
-
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
-                                                               // + vpor + vinsertf128
-    { TTI::SK_PermuteSingleSrc, MVT::v16f16, { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
-                                                               // + vpor + vinsertf128
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  { 4, 4, 4, 4 } }, // vextractf128 + 2*pshufb
-                                                               // + vpor + vinsertf128
-
-    { TTI::SK_PermuteTwoSrc, MVT::v4f64,  { 2, 2, 2, 2 } }, // 2*vshufpd + vblendpd
-    { TTI::SK_PermuteTwoSrc, MVT::v8f32,  { 2, 2, 2, 2 } }, // 2*vshufps + vblendps
-    { TTI::SK_PermuteTwoSrc, MVT::v4i64,  { 2, 2, 2, 2 } }, // 2*vpermilpd + vblendpd
-    { TTI::SK_PermuteTwoSrc, MVT::v8i32,  { 2, 2, 2, 2 } }, // 2*vpermilps + vblendps
-    { TTI::SK_PermuteTwoSrc, MVT::v16i16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
-                                                            // + 2*vpor + vinsertf128
-    { TTI::SK_PermuteTwoSrc, MVT::v16f16, { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
-                                                            // + 2*vpor + vinsertf128
-    { TTI::SK_PermuteTwoSrc, MVT::v32i8,  { 9, 9, 9, 9 } }, // 2*vextractf128 + 4*pshufb
-                                                            // + 2*vpor + vinsertf128
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, {1, 1, 1, 1}}, // vpermilpd
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, {1, 1, 1, 1}}, // vpermilpd
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, {1, 1, 1, 1}}, // vpermilps
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, {1, 1, 1, 1}}, // vpermilps
+
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v16i16,
+       {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+                      // + vpor + vinsertf128
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v16f16,
+       {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+                      // + vpor + vinsertf128
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v32i8,
+       {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+                      // + vpor + vinsertf128
+
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {2, 2, 2, 2}}, // 2*vshufpd + vblendpd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {2, 2, 2, 2}}, // 2*vshufps + vblendps
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v4i64,
+       {2, 2, 2, 2}}, // 2*vpermilpd + vblendpd
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v8i32,
+       {2, 2, 2, 2}}, // 2*vpermilps + vblendps
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v16i16,
+       {9, 9, 9, 9}}, // 2*vextractf128 + 4*pshufb
+                      // + 2*vpor + vinsertf128
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v16f16,
+       {9, 9, 9, 9}}, // 2*vextractf128 + 4*pshufb
+                      // + 2*vpor + vinsertf128
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v32i8,
+       {9, 9, 9, 9}}, // 2*vextractf128 + 4*pshufb
+                      // + 2*vpor + vinsertf128
   };
 
   if (IsInLaneShuffle && ST->hasAVX())
@@ -2060,62 +2092,100 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry AVX1ShuffleTbl[] = {
-      {TTI::SK_Broadcast, MVT::v4f64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
-      {TTI::SK_Broadcast, MVT::v8f32,  {2,2,2,2}}, // vperm2f128 + vpermilps
-      {TTI::SK_Broadcast, MVT::v4i64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
-      {TTI::SK_Broadcast, MVT::v8i32,  {2,2,2,2}}, // vperm2f128 + vpermilps
-      {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
-      {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
-      {TTI::SK_Broadcast, MVT::v32i8,  {2,2,2,2}}, // vpshufb + vinsertf128
-
-      {TTI::SK_Reverse, MVT::v4f64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
-      {TTI::SK_Reverse, MVT::v8f32,  {2,2,2,2}}, // vperm2f128 + vpermilps
-      {TTI::SK_Reverse, MVT::v4i64,  {2,2,2,2}}, // vperm2f128 + vpermilpd
-      {TTI::SK_Reverse, MVT::v8i32,  {2,2,2,2}}, // vperm2f128 + vpermilps
-      {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
-                                                 // + vinsertf128
-      {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
-                                                 // + vinsertf128
-      {TTI::SK_Reverse, MVT::v32i8,  {4,4,4,4}}, // vextractf128 + 2*pshufb
-                                                 // + vinsertf128
-
-      {TTI::SK_Select, MVT::v4i64,  {1,1,1,1}}, // vblendpd
-      {TTI::SK_Select, MVT::v4f64,  {1,1,1,1}}, // vblendpd
-      {TTI::SK_Select, MVT::v8i32,  {1,1,1,1}}, // vblendps
-      {TTI::SK_Select, MVT::v8f32,  {1,1,1,1}}, // vblendps
-      {TTI::SK_Select, MVT::v16i16, {3,3,3,3}}, // vpand + vpandn + vpor
-      {TTI::SK_Select, MVT::v16f16, {3,3,3,3}}, // vpand + vpandn + vpor
-      {TTI::SK_Select, MVT::v32i8,  {3,3,3,3}}, // vpand + vpandn + vpor
-
-      {TTI::SK_Splice, MVT::v4i64,  {2,2,2,2}}, // vperm2f128 + shufpd
-      {TTI::SK_Splice, MVT::v4f64,  {2,2,2,2}}, // vperm2f128 + shufpd
-      {TTI::SK_Splice, MVT::v8i32,  {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_Splice, MVT::v8f32,  {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_Splice, MVT::v16i16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-      {TTI::SK_Splice, MVT::v16f16, {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-      {TTI::SK_Splice, MVT::v32i8,  {5,5,5,5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
-
-      {TTI::SK_PermuteSingleSrc, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vshufpd
-      {TTI::SK_PermuteSingleSrc, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vshufpd
-      {TTI::SK_PermuteSingleSrc, MVT::v8f32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteSingleSrc, MVT::v8i32, {4,4,4,4}}, // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteSingleSrc, MVT::v16i16,{8,8,8,8}}, // vextractf128 + 4*pshufb
-                                                         // + 2*por + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v16f16,{8,8,8,8}}, // vextractf128 + 4*pshufb
-                                                         // + 2*por + vinsertf128
-      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {8,8,8,8}}, // vextractf128 + 4*pshufb
-                                                         // + 2*por + vinsertf128
-
-      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {3,3,3,3}},   // 2*vperm2f128 + vshufpd
-      {TTI::SK_PermuteTwoSrc, MVT::v4i64, {3,3,3,3}},   // 2*vperm2f128 + vshufpd
-      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {4,4,4,4}},   // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteTwoSrc, MVT::v8i32, {4,4,4,4}},   // 2*vperm2f128 + 2*vshufps
-      {TTI::SK_PermuteTwoSrc, MVT::v16i16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
-                                                          // + 4*por + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v16f16,{15,15,15,15}}, // 2*vextractf128 + 8*pshufb
-                                                          // + 4*por + vinsertf128
-      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {15,15,15,15}}, // 2*vextractf128 + 8*pshufb
-                                                         // + 4*por + vinsertf128
+      {TTI::SK_Broadcast, MVT::v4f64, {2, 2, 2, 2}}, // vperm2f128 + vpermilpd
+      {TTI::SK_Broadcast, MVT::v8f32, {2, 2, 2, 2}}, // vperm2f128 + vpermilps
+      {TTI::SK_Broadcast, MVT::v4i64, {2, 2, 2, 2}}, // vperm2f128 + vpermilpd
+      {TTI::SK_Broadcast, MVT::v8i32, {2, 2, 2, 2}}, // vperm2f128 + vpermilps
+      {TTI::SK_Broadcast,
+       MVT::v16i16,
+       {3, 3, 3, 3}}, // vpshuflw + vpshufd + vinsertf128
+      {TTI::SK_Broadcast,
+       MVT::v16f16,
+       {3, 3, 3, 3}}, // vpshuflw + vpshufd + vinsertf128
+      {TTI::SK_Broadcast, MVT::v32i8, {2, 2, 2, 2}}, // vpshufb + vinsertf128
+
+      {TTI::SK_Reverse, MVT::v4f64, {2, 2, 2, 2}},  // vperm2f128 + vpermilpd
+      {TTI::SK_Reverse, MVT::v8f32, {2, 2, 2, 2}},  // vperm2f128 + vpermilps
+      {TTI::SK_Reverse, MVT::v4i64, {2, 2, 2, 2}},  // vperm2f128 + vpermilpd
+      {TTI::SK_Reverse, MVT::v8i32, {2, 2, 2, 2}},  // vperm2f128 + vpermilps
+      {TTI::SK_Reverse, MVT::v16i16, {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+                                                    // + vinsertf128
+      {TTI::SK_Reverse, MVT::v16f16, {4, 4, 4, 4}}, // vextractf128 + 2*pshufb
+                                                    // + vinsertf128
+      {TTI::SK_Reverse, MVT::v32i8, {4, 4, 4, 4}},  // vextractf128 + 2*pshufb
+                                                    // + vinsertf128
+
+      {TTI::SK_Select, MVT::v4i64, {1, 1, 1, 1}},  // vblendpd
+      {TTI::SK_Select, MVT::v4f64, {1, 1, 1, 1}},  // vblendpd
+      {TTI::SK_Select, MVT::v8i32, {1, 1, 1, 1}},  // vblendps
+      {TTI::SK_Select, MVT::v8f32, {1, 1, 1, 1}},  // vblendps
+      {TTI::SK_Select, MVT::v16i16, {3, 3, 3, 3}}, // vpand + vpandn + vpor
+      {TTI::SK_Select, MVT::v16f16, {3, 3, 3, 3}}, // vpand + vpandn + vpor
+      {TTI::SK_Select, MVT::v32i8, {3, 3, 3, 3}},  // vpand + vpandn + vpor
+
+      {TTI::SK_Splice, MVT::v4i64, {2, 2, 2, 2}}, // vperm2f128 + shufpd
+      {TTI::SK_Splice, MVT::v4f64, {2, 2, 2, 2}}, // vperm2f128 + shufpd
+      {TTI::SK_Splice, MVT::v8i32, {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_Splice, MVT::v8f32, {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_Splice,
+       MVT::v16i16,
+       {5, 5, 5, 5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+      {TTI::SK_Splice,
+       MVT::v16f16,
+       {5, 5, 5, 5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+      {TTI::SK_Splice,
+       MVT::v32i8,
+       {5, 5, 5, 5}}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
+
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v4f64,
+       {2, 2, 2, 2}}, // vperm2f128 + vshufpd
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v4i64,
+       {2, 2, 2, 2}}, // vperm2f128 + vshufpd
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v8f32,
+       {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v8i32,
+       {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v16i16,
+       {8, 8, 8, 8}}, // vextractf128 + 4*pshufb
+                      // + 2*por + vinsertf128
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v16f16,
+       {8, 8, 8, 8}}, // vextractf128 + 4*pshufb
+                      // + 2*por + vinsertf128
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v32i8,
+       {8, 8, 8, 8}}, // vextractf128 + 4*pshufb
+                      // + 2*por + vinsertf128
+
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v4f64,
+       {3, 3, 3, 3}}, // 2*vperm2f128 + vshufpd
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v4i64,
+       {3, 3, 3, 3}}, // 2*vperm2f128 + vshufpd
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v8f32,
+       {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v8i32,
+       {4, 4, 4, 4}}, // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v16i16,
+       {15, 15, 15, 15}}, // 2*vextractf128 + 8*pshufb
+                          // + 4*por + vinsertf128
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v16f16,
+       {15, 15, 15, 15}}, // 2*vextractf128 + 8*pshufb
+                          // + 4*por + vinsertf128
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v32i8,
+       {15, 15, 15, 15}}, // 2*vextractf128 + 8*pshufb
+                          // + 4*por + vinsertf128
   };
 
   if (ST->hasAVX())
@@ -2124,13 +2194,13 @@ InstructionCost X86TTIImpl::getShuffleCost(
         return LT.first * *KindCost;
 
   static const CostKindTblEntry SSE41ShuffleTbl[] = {
-      {TTI::SK_Select, MVT::v2i64, {1,1,1,1}}, // pblendw
-      {TTI::SK_Select, MVT::v2f64, {1,1,1,1}}, // movsd
-      {TTI::SK_Select, MVT::v4i32, {1,1,1,1}}, // pblendw
-      {TTI::SK_Select, MVT::v4f32, {1,1,1,1}}, // blendps
-      {TTI::SK_Select, MVT::v8i16, {1,1,1,1}}, // pblendw
-      {TTI::SK_Select, MVT::v8f16, {1,1,1,1}}, // pblendw
-      {TTI::SK_Select, MVT::v16i8, {1,1,1,1}}  // pblendvb
+      {TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // pblendw
+      {TTI::SK_Select, MVT::v2f64, {1, 1, 1, 1}}, // movsd
+      {TTI::SK_Select, MVT::v4i32, {1, 1, 1, 1}}, // pblendw
+      {TTI::SK_Select, MVT::v4f32, {1, 1, 1, 1}}, // blendps
+      {TTI::SK_Select, MVT::v8i16, {1, 1, 1, 1}}, // pblendw
+      {TTI::SK_Select, MVT::v8f16, {1, 1, 1, 1}}, // pblendw
+      {TTI::SK_Select, MVT::v16i8, {1, 1, 1, 1}}  // pblendvb
   };
 
   if (ST->hasSSE41())
@@ -2184,8 +2254,10 @@ InstructionCost X86TTIImpl::getShuffleCost(
       {TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
       {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
       {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
-      {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
-                                                   // + 2*pshufd + 2*unpck + packus
+      {TTI::SK_Reverse,
+       MVT::v16i8,
+       {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
+                      // + 2*pshufd + 2*unpck + packus
 
       {TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd
       {TTI::SK_Select, MVT::v2f64, {1, 1, 1, 1}}, // movsd
@@ -2204,16 +2276,24 @@ InstructionCost X86TTIImpl::getShuffleCost(
       {TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
       {TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
       {TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
-      {TTI::SK_PermuteSingleSrc, MVT::v8i16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
-                                                            // + pshufd/unpck
-      {TTI::SK_PermuteSingleSrc, MVT::v8f16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
-                                                            // + pshufd/unpck
-      {TTI::SK_PermuteSingleSrc, MVT::v16i8, {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
-                                                                // + 2*pshufd + 2*unpck + 2*packus
-
-      {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}},     // shufpd
-      {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}},     // shufpd
-      {TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}},     // 2*{unpck,movsd,pshufd}
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v8i16,
+       {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+                      // + pshufd/unpck
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v8f16,
+       {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+                      // + pshufd/unpck
+      {TTI::SK_PermuteSingleSrc,
+       MVT::v16i8,
+       {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
+                          // + 2*pshufd + 2*unpck + 2*packus
+
+      {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
+      {TTI::SK_PermuteTwoSrc,
+       MVT::v4i32,
+       {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
       {TTI::SK_PermuteTwoSrc, MVT::v8i16, {8, 8, 8, 8}},     // blend+permute
       {TTI::SK_PermuteTwoSrc, MVT::v8f16, {8, 8, 8, 8}},     // blend+permute
       {TTI::SK_PermuteTwoSrc, MVT::v16i8, {13, 13, 13, 13}}, // blend+permute
@@ -2241,12 +2321,12 @@ InstructionCost X86TTIImpl::getShuffleCost(
   }
 
   static const CostKindTblEntry SSE1ShuffleTbl[] = {
-    { TTI::SK_Broadcast,        MVT::v4f32, {1,1,1,1} }, // shufps
-    { TTI::SK_Reverse,          MVT::v4f32, {1,1,1,1} }, // shufps
-    { TTI::SK_Select,           MVT::v4f32, {2,2,2,2} }, // 2*shufps
-    { TTI::SK_Splice,           MVT::v4f32, {2,2,2,2} }, // 2*shufps
-    { TTI::SK_PermuteSingleSrc, MVT::v4f32, {1,1,1,1} }, // shufps
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f32, {2,2,2,2} }, // 2*shufps
+      {TTI::SK_Broadcast, MVT::v4f32, {1, 1, 1, 1}},        // shufps
+      {TTI::SK_Reverse, MVT::v4f32, {1, 1, 1, 1}},          // shufps
+      {TTI::SK_Select, MVT::v4f32, {2, 2, 2, 2}},           // 2*shufps
+      {TTI::SK_Splice, MVT::v4f32, {2, 2, 2, 2}},           // 2*shufps
+      {TTI::SK_PermuteSingleSrc, MVT::v4f32, {1, 1, 1, 1}}, // shufps
+      {TTI::SK_PermuteTwoSrc, MVT::v4f32, {2, 2, 2, 2}},    // 2*shufps
   };
 
   if (ST->hasSSE1()) {

``````````

</details>


https://github.com/llvm/llvm-project/pull/124753


More information about the llvm-commits mailing list