[llvm] [VectorCombine] foldShuffleOfShuffles - fold shuffle(shuffle(x,y),poison) length changing masks (PR #144690)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 18 05:51:25 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
The shuffle merging code assumes that the shuffle sources are all the same type, which fails if we've changed length and don't have 2 inner shuffles.
This patch creates a fake "all poison" shuffle mask and reuses the other shuffle's sources, which can be safely used with the existing merge code.
The alternative was a considerable refactor of the merge code to account for different vector widths......
Fixes #<!-- -->144656
---
Full diff: https://github.com/llvm/llvm-project/pull/144690.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+13-2)
- (modified) llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll (+2-4)
- (modified) llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll (+16-20)
- (modified) llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll (+1-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 52cb1dbb33b86..9a63a0ceff884 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2275,6 +2275,17 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
if (!Match0 && !Match1)
return false;
+ // If the outer shuffle is a permute, then create a fake inner all-poison
+ // shuffle. This is easier than accounting for length-changing shuffles below.
+ SmallVector<int, 16> PoisonMask1;
+ if (Match0 && !Match1 && isa<PoisonValue>(OuterV1)) {
+ X1 = X0;
+ Y1 = Y0;
+ PoisonMask1.append(InnerMask0.size(), PoisonMaskElem);
+ InnerMask1 = PoisonMask1;
+ Match1 = true; // fake match
+ }
+
X0 = Match0 ? X0 : OuterV0;
Y0 = Match0 ? Y0 : OuterV0;
X1 = Match1 ? X1 : OuterV1;
@@ -2349,11 +2360,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
// Try to merge the shuffles if the new shuffle is not costly.
InstructionCost InnerCost0 = 0;
if (Match0)
- InnerCost0 = TTI.getInstructionCost(cast<Instruction>(OuterV0), CostKind);
+ InnerCost0 = TTI.getInstructionCost(cast<User>(OuterV0), CostKind);
InstructionCost InnerCost1 = 0;
if (Match1)
- InnerCost1 = TTI.getInstructionCost(cast<Instruction>(OuterV1), CostKind);
+ InnerCost1 = TTI.getInstructionCost(cast<User>(OuterV1), CostKind);
InstructionCost OuterCost = TTI.getInstructionCost(&I, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 1c9bc77ac3bef..1c128c8f56a03 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -262,10 +262,8 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splattwice(
-; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[AS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[BS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index 642d07a8f3253..e85c092b1b213 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -32,8 +32,7 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
-; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 0
@@ -48,8 +47,7 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
-; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 0
@@ -86,8 +84,7 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
@@ -96,10 +93,14 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -119,8 +120,7 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
-; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
@@ -152,8 +152,7 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
-; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 0
@@ -164,8 +163,7 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
-; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 1>
-; CHECK-NEXT: ret <2 x double> [[INS]]
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ext = extractelement <4 x double> %b, i32 1
%ins = insertelement <2 x double> poison, double %ext, i32 1
@@ -174,8 +172,7 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
-; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 2>
; SSE-NEXT: ret <2 x double> [[INS]]
;
; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
@@ -190,8 +187,7 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
-; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 3
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 73476308916fb..40437ca345224 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -578,8 +578,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: ret <8 x i32> [[R]]
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%l = load <1 x i32>, ptr %p, align 4
store <1 x i32> %l, ptr %store_ptr
``````````
</details>
https://github.com/llvm/llvm-project/pull/144690
More information about the llvm-commits
mailing list