[llvm] [CostModel][X86] Reduce worst case v8i16/v16i8 SSE2 shuffle costs (PR #124789)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 08:56:40 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
These were based off instruction count, not throughput - we can probably improve these further, but these throughput numbers match the worse expanded shuffles we see in the vector-shuffle-128-v* codegen tests.
---
Patch is 123.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124789.diff
15 Files Affected:
- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+7-7)
- (modified) llvm/test/Analysis/CostModel/X86/reduce-xor.ll (+5-5)
- (modified) llvm/test/Analysis/CostModel/X86/reduction.ll (+6-6)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll (+6-6)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll (+23-23)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll (+7-7)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll (+6-6)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll (+7-7)
- (modified) llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll (+52-52)
- (modified) llvm/test/Analysis/CostModel/X86/vector-insert-value.ll (+72-72)
- (modified) llvm/test/Analysis/CostModel/X86/vector-insert.ll (+52-52)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/hadd.ll (+4-7)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/hsub.ll (+4-7)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll (+3-11)
- (modified) llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll (+6-14)
``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 82523bb6557adc..f6552c500d7cfb 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2204,19 +2204,19 @@ InstructionCost X86TTIImpl::getShuffleCost(
{TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_PermuteSingleSrc, MVT::v8i16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v8f16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v16i8, {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + 2*packus
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, {8, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + 2*packus
{TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
- {TTI::SK_PermuteTwoSrc, MVT::v8i16, {8, 8, 8, 8}}, // blend+permute
- {TTI::SK_PermuteTwoSrc, MVT::v8f16, {8, 8, 8, 8}}, // blend+permute
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, {13, 13, 13, 13}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, {6, 8, 8, 8}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, {6, 8, 8, 8}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, {11, 13, 13, 13}}, // blend+permute
};
static const CostTblEntry SSE3BroadcastLoadTbl[] = {
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
index f1c8bcda65bd13..757db4a5a41bec 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
@@ -157,11 +157,11 @@ define i32 @reduce_i1(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/reduction.ll b/llvm/test/Analysis/CostModel/X86/reduction.ll
index 5ff3920c638749..4ad0887a27884d 100644
--- a/llvm/test/Analysis/CostModel/X86/reduction.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduction.ll
@@ -634,9 +634,9 @@ define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
; SSE2-LABEL: 'no_pairwise_reduction8i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
@@ -1126,11 +1126,11 @@ define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
; SSE2-LABEL: 'pairwise_reduction8i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
index e91db1b0cb14a3..f0536f96941ac5 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
@@ -251,7 +251,7 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256,
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -275,7 +275,7 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256,
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -899,9 +899,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -923,9 +923,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
index bc3af51914499a..a9443cfe0f97bd 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
@@ -527,23 +527,23 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124789
More information about the llvm-commits
mailing list