[llvm] eb2b453 - [VectorCombine] foldInsExtVectorToShuffle - ensure we call getShuffleCost with the input operand type, not the result

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 09:43:31 PST 2025


Author: Simon Pilgrim
Date: 2025-02-06T17:41:24Z
New Revision: eb2b453eb73dfe0b8cf516dfb4d568c0b858d1fa

URL: https://github.com/llvm/llvm-project/commit/eb2b453eb73dfe0b8cf516dfb4d568c0b858d1fa
DIFF: https://github.com/llvm/llvm-project/commit/eb2b453eb73dfe0b8cf516dfb4d568c0b858d1fa.diff

LOG: [VectorCombine] foldInsExtVectorToShuffle - ensure we call getShuffleCost with the input operand type, not the result

Typo in #121216

Fixes #126085

Added: 
    llvm/test/Transforms/VectorCombine/X86/pr126085.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
    llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
    llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4f46b621c8a70c8..746742e14d080e6 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3210,7 +3210,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
       ExtToVecMask[0] = ExtIdx;
     // Add cost for expanding or narrowing
     NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
-                                 DstVecTy, ExtToVecMask, CostKind);
+                                 SrcVecTy, ExtToVecMask, CostKind);
     NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind);
   }
 

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index 0a9386c0b8db1de..642d07a8f325371 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -58,15 +58,10 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    ret <4 x double> [[INS]]
-;
-; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
-; AVX-NEXT:    ret <4 x double> [[INS]]
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> poison, double %ext, i32 0
@@ -85,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -119,8 +119,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
@@ -152,8 +152,8 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 0
@@ -173,10 +173,15 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
 }
 
 define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
-; CHECK-NEXT:    ret <2 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; SSE-NEXT:    ret <2 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; AVX-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; AVX-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 2
   %ins = insertelement <2 x double> poison, double %ext, i32 1

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
index 41200e517f5ed25..193ad36616a4a52 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -98,8 +98,8 @@ define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 0
@@ -119,10 +119,15 @@ define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
 }
 
 define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
-; CHECK-NEXT:    ret <2 x double> [[INS]]
+; SSE-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
+; SSE-NEXT:    ret <2 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; AVX-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; AVX-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 2
   %ins = insertelement <2 x double> %a, double %ext, i32 0
@@ -142,8 +147,8 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 0
@@ -163,10 +168,15 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
 }
 
 define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
-; CHECK-NEXT:    ret <2 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
+; SSE-NEXT:    ret <2 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; AVX-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; AVX-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 2
   %ins = insertelement <2 x double> %a, double %ext, i32 1

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index b12104c5c673eaf..84c223be8862155 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -574,8 +574,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
 ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
 ; CHECK-NEXT:    [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
-; CHECK-NEXT:    [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    ret <8 x i32> [[R]]
 ;
   %l = load <1 x i32>, ptr %p, align 4

diff  --git a/llvm/test/Transforms/VectorCombine/X86/pr126085.ll b/llvm/test/Transforms/VectorCombine/X86/pr126085.ll
new file mode 100644
index 000000000000000..f596807027db6c8
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/pr126085.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- | FileCheck %s
+
+define i32 @test(ptr %a0) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[A0:%.*]]) {
+; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[A0]], align 1
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <16 x i8> [[LOAD]], <16 x i8> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <16 x i8> [[LOAD]], i64 11
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x i8> [[SHUF]], i8 [[ELT]], i64 1
+; CHECK-NEXT:    [[RES:%.*]] = bitcast <4 x i8> [[INS]] to i32
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %load = load <16 x i8>, ptr %a0, align 1
+  %shuf = shufflevector <16 x i8> %load, <16 x i8> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+  %elt = extractelement <16 x i8> %load, i64 11
+  %ins = insertelement <4 x i8> %shuf, i8 %elt, i64 1
+  %res = bitcast <4 x i8> %ins to i32
+  ret i32 %res
+}


        


More information about the llvm-commits mailing list