[llvm] [VectorCombine] foldShuffleOfBinops - support multiple uses of shuffled binops (PR #179429)
Julian Pokrovsky via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 5 01:08:34 PST 2026
https://github.com/raventid updated https://github.com/llvm/llvm-project/pull/179429
>From 444b1fd782554dc84a2b1abd03352ce0abc79092 Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Tue, 3 Feb 2026 18:50:53 +0800
Subject: [PATCH 1/4] [VectorCombine] foldShuffleOfBinops - support multiple
uses of shuffled binops
Resoves https://github.com/llvm/llvm-project/issues/173035
---
.../Transforms/Vectorize/VectorCombine.cpp | 11 +++-
.../VectorCombine/AArch64/select-shuffle.ll | 3 +-
.../VectorCombine/X86/shuffle-of-binops.ll | 52 +++++++++++++++++++
.../VectorCombine/X86/shuffle-of-cmps.ll | 37 +++++++++----
4 files changed, 91 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b3295576eb73e..6ccd1b2a3b7c7 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2543,8 +2543,8 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
ArrayRef<int> OldMask;
Instruction *LHS, *RHS;
- if (!match(&I, m_Shuffle(m_OneUse(m_Instruction(LHS)),
- m_OneUse(m_Instruction(RHS)), m_Mask(OldMask))))
+ if (!match(&I, m_Shuffle(m_Instruction(LHS), m_Instruction(RHS),
+ m_Mask(OldMask))))
return false;
// TODO: Add support for addlike etc.
@@ -2661,6 +2661,13 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
PredLHS, CostKind, Op0Info, Op1Info);
}
+ // If LHS/RHS have other uses, we need to account for the cost of keeping
+ // the original instructions.
+ if (!LHS->hasOneUse())
+ NewCost += TTI.getInstructionCost(LHS, CostKind);
+ if (!RHS->hasOneUse())
+ NewCost += TTI.getInstructionCost(RHS, CostKind);
+
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 3a3ba74663b93..367240e30d243 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -408,7 +408,8 @@ define void @test_31(ptr %src, ptr %dst) {
; CHECK-NEXT: [[T0:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 0, i32 2, i32 5, i32 2>
; CHECK-NEXT: [[H0:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 0
; CHECK-NEXT: store <4 x i32> [[T0]], ptr [[H0]], align 16
-; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[H1:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 1
; CHECK-NEXT: store <4 x i32> [[T1]], ptr [[H1]], align 16
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index 0cceab506bb75..e1e9501968043 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -197,6 +197,58 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
ret <4 x i32> %r
}
+; Multi-use tests
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT: [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = sdiv <4 x i32> %x, %y
+ call void @use(<4 x i32> %b0)
+ %b1 = sdiv <4 x i32> %z, %y
+ %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT: call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = sdiv <4 x i32> %x, %y
+ %b1 = sdiv <4 x i32> %z, %y
+ call void @use(<4 x i32> %b1)
+ %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT: call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT: call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = sdiv <4 x i32> %x, %y
+ %b1 = sdiv <4 x i32> %z, %y
+ call void @use(<4 x i32> %b0)
+ call void @use(<4 x i32> %b1)
+ %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %r
+}
+
; non-matching operands (not commutable)
define <4 x float> @shuf_fdiv_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) {
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index f9108efa7ee79..c044764b0ede9 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -248,17 +248,36 @@ define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64>
ret <4 x i32> %r
}
-; negative test - uses
+; Multi-use test - fold benaviour depends on enabled extensions
define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
-; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
-; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
-; CHECK-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
-; CHECK-NEXT: call void @use(<4 x i1> [[C0]])
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> [[R]]
+; SSE-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; SSE-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; SSE-NEXT: call void @use(<4 x i1> [[C0]])
+; SSE-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; SSE-NEXT: ret <4 x i32> [[R]]
+;
+; AVX2-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX2-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; AVX2-NEXT: call void @use(<4 x i1> [[C0]])
+; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX2-NEXT: ret <4 x i32> [[R]]
+;
+; AVX512-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX512-NEXT: call void @use(<4 x i1> [[C0]])
+; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT: [[S:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[TMP2]]
+; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX512-NEXT: ret <4 x i32> [[R]]
;
%c0 = icmp ugt <4 x i32> %x, %y
%c1 = icmp ugt <4 x i32> %z, %w
>From d9787126199f48c759d134c1a1177d0d263ef0f5 Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Tue, 3 Feb 2026 18:58:28 +0800
Subject: [PATCH 2/4] fix typo in a comment
---
llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index c044764b0ede9..30ba7bd6d03d1 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -248,7 +248,7 @@ define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64>
ret <4 x i32> %r
}
-; Multi-use test - fold benaviour depends on enabled extensions
+; Multi-use test - folding depends on enabled extensions
define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
; SSE-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
>From 0950f52344f06362f54ac4eff4bc6448f503fa5b Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Wed, 4 Feb 2026 15:18:18 +0800
Subject: [PATCH 3/4] update cost computation logic with corner cases
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 13 +++++++++----
.../VectorCombine/AArch64/select-shuffle.ll | 3 +--
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 6ccd1b2a3b7c7..45fb0113d41fc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2605,9 +2605,11 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
}
// Try to replace a binop with a shuffle if the shuffle is not costly.
+ // When LHS == RHS, only count the binop cost once.
InstructionCost OldCost =
TTI.getInstructionCost(LHS, CostKind) +
- TTI.getInstructionCost(RHS, CostKind) +
+ (LHS != RHS ? TTI.getInstructionCost(RHS, CostKind)
+ : InstructionCost(0)) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
BinResTy, OldMask, CostKind, 0, nullptr, {LHS, RHS},
&I);
@@ -2642,7 +2644,10 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0, CostKind);
ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1, CostKind);
bool SingleSrcBinOp = (X == Y) && (Z == W) && (NewMask0 == NewMask1);
- ReducedInstCount |= SingleSrcBinOp;
+ // SingleSrcBinOp only reduces instruction count if we also eliminate the
+ // original binop(s). If binops have multiple uses, they won't be eliminated.
+ ReducedInstCount |=
+ SingleSrcBinOp && LHS->hasOneUse() && (LHS == RHS || RHS->hasOneUse());
auto *ShuffleCmpTy =
FixedVectorType::get(BinOpTy->getElementType(), ShuffleDstTy);
@@ -2662,10 +2667,10 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
}
// If LHS/RHS have other uses, we need to account for the cost of keeping
- // the original instructions.
+ // the original instructions. When LHS == RHS, only add the cost once.
if (!LHS->hasOneUse())
NewCost += TTI.getInstructionCost(LHS, CostKind);
- if (!RHS->hasOneUse())
+ if (LHS != RHS && !RHS->hasOneUse())
NewCost += TTI.getInstructionCost(RHS, CostKind);
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 367240e30d243..3a3ba74663b93 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -408,8 +408,7 @@ define void @test_31(ptr %src, ptr %dst) {
; CHECK-NEXT: [[T0:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 0, i32 2, i32 5, i32 2>
; CHECK-NEXT: [[H0:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 0
; CHECK-NEXT: store <4 x i32> [[T0]], ptr [[H0]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
-; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[TMP1]], [[TMP1]]
+; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 6, i32 0, i32 1, i32 7>
; CHECK-NEXT: [[H1:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 1
; CHECK-NEXT: store <4 x i32> [[T1]], ptr [[H1]], align 16
; CHECK-NEXT: ret void
>From 7e5b21300611d60cc0d0f3034575da8d89d96cb5 Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Thu, 5 Feb 2026 17:08:07 +0800
Subject: [PATCH 4/4] fix weight computation in shuffling of binops
---
.../Transforms/Vectorize/VectorCombine.cpp | 28 ++++++++++---------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 45fb0113d41fc..7967cc7eef9a2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2575,6 +2575,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
if (!ShuffleDstTy || !BinResTy || !BinOpTy || X->getType() != Z->getType())
return false;
+ bool SameBinOp = LHS == RHS;
unsigned NumSrcElts = BinOpTy->getNumElements();
// If we have something like "add X, Y" and "add Z, X", swap ops to match.
@@ -2605,12 +2606,15 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
}
// Try to replace a binop with a shuffle if the shuffle is not costly.
- // When LHS == RHS, only count the binop cost once.
- InstructionCost OldCost =
- TTI.getInstructionCost(LHS, CostKind) +
- (LHS != RHS ? TTI.getInstructionCost(RHS, CostKind)
- : InstructionCost(0)) +
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
+ // When SameBinOp, only count the binop cost once.
+ InstructionCost LHSCost = TTI.getInstructionCost(LHS, CostKind);
+ InstructionCost RHSCost = TTI.getInstructionCost(RHS, CostKind);
+
+ InstructionCost OldCost = LHSCost;
+ if (!SameBinOp) {
+ OldCost += RHSCost;
+ }
+ OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
BinResTy, OldMask, CostKind, 0, nullptr, {LHS, RHS},
&I);
@@ -2646,8 +2650,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
bool SingleSrcBinOp = (X == Y) && (Z == W) && (NewMask0 == NewMask1);
// SingleSrcBinOp only reduces instruction count if we also eliminate the
// original binop(s). If binops have multiple uses, they won't be eliminated.
- ReducedInstCount |=
- SingleSrcBinOp && LHS->hasOneUse() && (LHS == RHS || RHS->hasOneUse());
+ ReducedInstCount |= SingleSrcBinOp && LHS->hasOneUse() && RHS->hasOneUse();
auto *ShuffleCmpTy =
FixedVectorType::get(BinOpTy->getElementType(), ShuffleDstTy);
@@ -2665,13 +2668,12 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
TTI.getCmpSelInstrCost(LHS->getOpcode(), ShuffleCmpTy, ShuffleDstTy,
PredLHS, CostKind, Op0Info, Op1Info);
}
-
// If LHS/RHS have other uses, we need to account for the cost of keeping
- // the original instructions. When LHS == RHS, only add the cost once.
+ // the original instructions. When SameBinOp, only add the cost once.
if (!LHS->hasOneUse())
- NewCost += TTI.getInstructionCost(LHS, CostKind);
- if (LHS != RHS && !RHS->hasOneUse())
- NewCost += TTI.getInstructionCost(RHS, CostKind);
+ NewCost += LHSCost;
+ if (!SameBinOp && !RHS->hasOneUse())
+ NewCost += RHSCost;
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
More information about the llvm-commits
mailing list