[llvm] eb2b453 - [VectorCombine] foldInsExtVectorToShuffle - ensure we call getShuffleCost with the input operand type, not the result
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 09:43:31 PST 2025
Author: Simon Pilgrim
Date: 2025-02-06T17:41:24Z
New Revision: eb2b453eb73dfe0b8cf516dfb4d568c0b858d1fa
URL: https://github.com/llvm/llvm-project/commit/eb2b453eb73dfe0b8cf516dfb4d568c0b858d1fa
DIFF: https://github.com/llvm/llvm-project/commit/eb2b453eb73dfe0b8cf516dfb4d568c0b858d1fa.diff
LOG: [VectorCombine] foldInsExtVectorToShuffle - ensure we call getShuffleCost with the input operand type, not the result
Typo in #121216
Fixes #126085
Added:
llvm/test/Transforms/VectorCombine/X86/pr126085.ll
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
llvm/test/Transforms/VectorCombine/X86/load.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4f46b621c8a70c8..746742e14d080e6 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3210,7 +3210,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
ExtToVecMask[0] = ExtIdx;
// Add cost for expanding or narrowing
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- DstVecTy, ExtToVecMask, CostKind);
+ SrcVecTy, ExtToVecMask, CostKind);
NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind);
}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index 0a9386c0b8db1de..642d07a8f325371 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -58,15 +58,10 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: ret <4 x double> [[INS]]
-;
-; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
-; AVX-NEXT: ret <4 x double> [[INS]]
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> poison, double %ext, i32 0
@@ -85,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -119,8 +119,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
@@ -152,8 +152,8 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 0
@@ -173,10 +173,15 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
}
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
-; CHECK-NEXT: ret <2 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; SSE-NEXT: ret <2 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; AVX-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 2
%ins = insertelement <2 x double> poison, double %ext, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
index 41200e517f5ed25..193ad36616a4a52 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -98,8 +98,8 @@ define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 0
@@ -119,10 +119,15 @@ define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
}
define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
-; CHECK-NEXT: ret <2 x double> [[INS]]
+; SSE-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
+; SSE-NEXT: ret <2 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; AVX-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 2
%ins = insertelement <2 x double> %a, double %ext, i32 0
@@ -142,8 +147,8 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 0
@@ -163,10 +168,15 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
}
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
-; CHECK-NEXT: ret <2 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
+; SSE-NEXT: ret <2 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; AVX-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 2
%ins = insertelement <2 x double> %a, double %ext, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index b12104c5c673eaf..84c223be8862155 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -574,8 +574,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
-; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%l = load <1 x i32>, ptr %p, align 4
diff --git a/llvm/test/Transforms/VectorCombine/X86/pr126085.ll b/llvm/test/Transforms/VectorCombine/X86/pr126085.ll
new file mode 100644
index 000000000000000..f596807027db6c8
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/pr126085.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- | FileCheck %s
+
+define i32 @test(ptr %a0) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[A0:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[A0]], align 1
+; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <16 x i8> [[LOAD]], <16 x i8> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i8> [[LOAD]], i64 11
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i8> [[SHUF]], i8 [[ELT]], i64 1
+; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i8> [[INS]] to i32
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %load = load <16 x i8>, ptr %a0, align 1
+ %shuf = shufflevector <16 x i8> %load, <16 x i8> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+ %elt = extractelement <16 x i8> %load, i64 11
+ %ins = insertelement <4 x i8> %shuf, i8 %elt, i64 1
+ %res = bitcast <4 x i8> %ins to i32
+ ret i32 %res
+}
More information about the llvm-commits
mailing list