[llvm] [VectorCombine] foldShuffleOfBinops - if both operands are the same don't duplicate the total new cost (PR #172719)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 17 11:08:13 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
If we're shuffling/concatenating the same operands then ensure we don't duplicate the total cost, ensure we reuse the final shuffle and recognise that we reduce the total instruction count (so fold even when NewCost == OldCost, not just NewCost < OldCost).
---
Full diff: https://github.com/llvm/llvm-project/pull/172719.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+9-6)
- (modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll (+6-14)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 695e97dec24aa..9239cb1b989b2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2491,14 +2491,16 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
ReducedInstCount |= MergeInner(Y, 0, NewMask1, CostKind);
ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0, CostKind);
ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1, CostKind);
+ bool SingleSrcBinOp = (X == Y) && (Z == W) && (NewMask0 == NewMask1);
+ ReducedInstCount |= SingleSrcBinOp;
auto *ShuffleCmpTy =
FixedVectorType::get(BinOpTy->getElementType(), ShuffleDstTy);
- InstructionCost NewCost =
- TTI.getShuffleCost(SK0, ShuffleCmpTy, BinOpTy, NewMask0, CostKind, 0,
- nullptr, {X, Z}) +
- TTI.getShuffleCost(SK1, ShuffleCmpTy, BinOpTy, NewMask1, CostKind, 0,
- nullptr, {Y, W});
+ InstructionCost NewCost = TTI.getShuffleCost(
+ SK0, ShuffleCmpTy, BinOpTy, NewMask0, CostKind, 0, nullptr, {X, Z});
+ if (!SingleSrcBinOp)
+ NewCost += TTI.getShuffleCost(SK1, ShuffleCmpTy, BinOpTy, NewMask1,
+ CostKind, 0, nullptr, {Y, W});
if (PredLHS == CmpInst::BAD_ICMP_PREDICATE) {
NewCost +=
@@ -2520,7 +2522,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
return false;
Value *Shuf0 = Builder.CreateShuffleVector(X, Z, NewMask0);
- Value *Shuf1 = Builder.CreateShuffleVector(Y, W, NewMask1);
+ Value *Shuf1 =
+ SingleSrcBinOp ? Shuf0 : Builder.CreateShuffleVector(Y, W, NewMask1);
Value *NewBO = PredLHS == CmpInst::BAD_ICMP_PREDICATE
? Builder.CreateBinOp(
cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index 8c66684dfe137..40f55effcc881 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -59,22 +59,14 @@ define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x
ret <4 x float> %r
}
-; FIXME: For repeated ops, don't repeat the shuffle cost.
+; For repeated ops, don't repeat the shuffle cost.
define <8 x float> @shuf_fmul_v4f32_self_mul(<4 x float> %a0, <4 x float> %a1) {
-; SSE-LABEL: define <8 x float> @shuf_fmul_v4f32_self_mul(
-; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[R:%.*]] = fmul <8 x float> [[TMP1]], [[TMP2]]
-; SSE-NEXT: ret <8 x float> [[R]]
-;
-; AVX-LABEL: define <8 x float> @shuf_fmul_v4f32_self_mul(
-; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
-; AVX-NEXT: [[M0:%.*]] = fmul <4 x float> [[A0]], [[A0]]
-; AVX-NEXT: [[M1:%.*]] = fmul <4 x float> [[A1]], [[A1]]
-; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[M0]], <4 x float> [[M1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: ret <8 x float> [[R]]
+; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_self_mul(
+; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[R:%.*]] = fmul <8 x float> [[TMP1]], [[TMP1]]
+; CHECK-NEXT: ret <8 x float> [[R]]
;
%m0 = fmul <4 x float> %a0, %a0
%m1 = fmul <4 x float> %a1, %a1
``````````
</details>
https://github.com/llvm/llvm-project/pull/172719
More information about the llvm-commits
mailing list