[llvm] [VectorCombine] foldShuffleOfBinops - support multiple uses of shuffled binops (PR #179429)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 3 02:56:59 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Julian Pokrovsky (raventid)
<details>
<summary>Changes</summary>
Resolves https://github.com/llvm/llvm-project/issues/173035
---
Full diff: https://github.com/llvm/llvm-project/pull/179429.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+9-2)
- (modified) llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll (+2-1)
- (modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll (+52)
- (modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll (+28-9)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b3295576eb73e..6ccd1b2a3b7c7 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2543,8 +2543,8 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
ArrayRef<int> OldMask;
Instruction *LHS, *RHS;
- if (!match(&I, m_Shuffle(m_OneUse(m_Instruction(LHS)),
- m_OneUse(m_Instruction(RHS)), m_Mask(OldMask))))
+ if (!match(&I, m_Shuffle(m_Instruction(LHS), m_Instruction(RHS),
+ m_Mask(OldMask))))
return false;
// TODO: Add support for addlike etc.
@@ -2661,6 +2661,13 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
PredLHS, CostKind, Op0Info, Op1Info);
}
+ // If LHS/RHS have other uses, we need to account for the cost of keeping
+ // the original instructions.
+ if (!LHS->hasOneUse())
+ NewCost += TTI.getInstructionCost(LHS, CostKind);
+ if (!RHS->hasOneUse())
+ NewCost += TTI.getInstructionCost(RHS, CostKind);
+
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 3a3ba74663b93..367240e30d243 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -408,7 +408,8 @@ define void @test_31(ptr %src, ptr %dst) {
; CHECK-NEXT: [[T0:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 0, i32 2, i32 5, i32 2>
; CHECK-NEXT: [[H0:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 0
; CHECK-NEXT: store <4 x i32> [[T0]], ptr [[H0]], align 16
-; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[H1:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 1
; CHECK-NEXT: store <4 x i32> [[T1]], ptr [[H1]], align 16
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index 0cceab506bb75..e1e9501968043 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -197,6 +197,58 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
ret <4 x i32> %r
}
+; Multi-use tests
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT: [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = sdiv <4 x i32> %x, %y
+ call void @use(<4 x i32> %b0)
+ %b1 = sdiv <4 x i32> %z, %y
+ %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT: call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = sdiv <4 x i32> %x, %y
+ %b1 = sdiv <4 x i32> %z, %y
+ call void @use(<4 x i32> %b1)
+ %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT: call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT: call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = sdiv <4 x i32> %x, %y
+ %b1 = sdiv <4 x i32> %z, %y
+ call void @use(<4 x i32> %b0)
+ call void @use(<4 x i32> %b1)
+ %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %r
+}
+
; non-matching operands (not commutable)
define <4 x float> @shuf_fdiv_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) {
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index f9108efa7ee79..c044764b0ede9 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -248,17 +248,36 @@ define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64>
ret <4 x i32> %r
}
-; negative test - uses
+; Multi-use test - fold benaviour depends on enabled extensions
define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
-; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
-; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
-; CHECK-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
-; CHECK-NEXT: call void @use(<4 x i1> [[C0]])
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> [[R]]
+; SSE-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; SSE-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; SSE-NEXT: call void @use(<4 x i1> [[C0]])
+; SSE-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; SSE-NEXT: ret <4 x i32> [[R]]
+;
+; AVX2-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX2-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; AVX2-NEXT: call void @use(<4 x i1> [[C0]])
+; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX2-NEXT: ret <4 x i32> [[R]]
+;
+; AVX512-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX512-NEXT: call void @use(<4 x i1> [[C0]])
+; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT: [[S:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[TMP2]]
+; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX512-NEXT: ret <4 x i32> [[R]]
;
%c0 = icmp ugt <4 x i32> %x, %y
%c1 = icmp ugt <4 x i32> %z, %w
``````````
</details>
https://github.com/llvm/llvm-project/pull/179429
More information about the llvm-commits
mailing list