[llvm] [VectorCombine] foldShuffleOfBinops - support multiple uses of shuffled binops (PR #179429)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 3 02:56:59 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Julian Pokrovsky (raventid)

<details>
<summary>Changes</summary>

Resolves https://github.com/llvm/llvm-project/issues/173035

---
Full diff: https://github.com/llvm/llvm-project/pull/179429.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+9-2) 
- (modified) llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll (+2-1) 
- (modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll (+52) 
- (modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll (+28-9) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b3295576eb73e..6ccd1b2a3b7c7 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2543,8 +2543,8 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
 bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   ArrayRef<int> OldMask;
   Instruction *LHS, *RHS;
-  if (!match(&I, m_Shuffle(m_OneUse(m_Instruction(LHS)),
-                           m_OneUse(m_Instruction(RHS)), m_Mask(OldMask))))
+  if (!match(&I, m_Shuffle(m_Instruction(LHS), m_Instruction(RHS),
+                           m_Mask(OldMask))))
     return false;
 
   // TODO: Add support for addlike etc.
@@ -2661,6 +2661,13 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
                                PredLHS, CostKind, Op0Info, Op1Info);
   }
 
+  // If LHS/RHS have other uses, we need to account for the cost of keeping
+  // the original instructions.
+  if (!LHS->hasOneUse())
+    NewCost += TTI.getInstructionCost(LHS, CostKind);
+  if (!RHS->hasOneUse())
+    NewCost += TTI.getInstructionCost(RHS, CostKind);
+
   LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
                     << "\n  OldCost: " << OldCost << " vs NewCost: " << NewCost
                     << "\n");
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 3a3ba74663b93..367240e30d243 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -408,7 +408,8 @@ define void @test_31(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[T0:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 0, i32 2, i32 5, i32 2>
 ; CHECK-NEXT:    [[H0:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[T0]], ptr [[H0]], align 16
-; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[H1:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 1
 ; CHECK-NEXT:    store <4 x i32> [[T1]], ptr [[H1]], align 16
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index 0cceab506bb75..e1e9501968043 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -197,6 +197,58 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
   ret <4 x i32> %r
 }
 
+; Multi-use tests
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT:    [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %b0 = sdiv <4 x i32> %x, %y
+  call void @use(<4 x i32> %b0)
+  %b1 = sdiv <4 x i32> %z, %y
+  %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT:    call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %b0 = sdiv <4 x i32> %x, %y
+  %b1 = sdiv <4 x i32> %z, %y
+  call void @use(<4 x i32> %b1)
+  %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT:    call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT:    call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %b0 = sdiv <4 x i32> %x, %y
+  %b1 = sdiv <4 x i32> %z, %y
+  call void @use(<4 x i32> %b0)
+  call void @use(<4 x i32> %b1)
+  %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %r
+}
+
 ; non-matching operands (not commutable)
 
 define <4 x float> @shuf_fdiv_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) {
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index f9108efa7ee79..c044764b0ede9 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -248,17 +248,36 @@ define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64>
   ret <4 x i32> %r
 }
 
-; negative test - uses
+; Multi-use test - fold benaviour depends on enabled extensions
 
 define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
-; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
-; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
-; CHECK-NEXT:    [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
-; CHECK-NEXT:    call void @use(<4 x i1> [[C0]])
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
-; CHECK-NEXT:    ret <4 x i32> [[R]]
+; SSE-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; SSE-NEXT:    [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; SSE-NEXT:    call void @use(<4 x i1> [[C0]])
+; SSE-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; SSE-NEXT:    ret <4 x i32> [[R]]
+;
+; AVX2-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX2-NEXT:    [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; AVX2-NEXT:    call void @use(<4 x i1> [[C0]])
+; AVX2-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX2-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX2-NEXT:    ret <4 x i32> [[R]]
+;
+; AVX512-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX512-NEXT:    call void @use(<4 x i1> [[C0]])
+; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT:    [[S:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[TMP2]]
+; AVX512-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX512-NEXT:    ret <4 x i32> [[R]]
 ;
   %c0 = icmp ugt <4 x i32> %x, %y
   %c1 = icmp ugt <4 x i32> %z, %w

``````````

</details>


https://github.com/llvm/llvm-project/pull/179429


More information about the llvm-commits mailing list