[llvm] [VectorCombine] foldShuffleOfBinops - support multiple uses of shuffled binops (PR #179429)

Julian Pokrovsky via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 5 01:08:34 PST 2026


https://github.com/raventid updated https://github.com/llvm/llvm-project/pull/179429

>From 444b1fd782554dc84a2b1abd03352ce0abc79092 Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Tue, 3 Feb 2026 18:50:53 +0800
Subject: [PATCH 1/4] [VectorCombine] foldShuffleOfBinops - support multiple
 uses of shuffled binops

Resoves https://github.com/llvm/llvm-project/issues/173035
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 11 +++-
 .../VectorCombine/AArch64/select-shuffle.ll   |  3 +-
 .../VectorCombine/X86/shuffle-of-binops.ll    | 52 +++++++++++++++++++
 .../VectorCombine/X86/shuffle-of-cmps.ll      | 37 +++++++++----
 4 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b3295576eb73e..6ccd1b2a3b7c7 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2543,8 +2543,8 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
 bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   ArrayRef<int> OldMask;
   Instruction *LHS, *RHS;
-  if (!match(&I, m_Shuffle(m_OneUse(m_Instruction(LHS)),
-                           m_OneUse(m_Instruction(RHS)), m_Mask(OldMask))))
+  if (!match(&I, m_Shuffle(m_Instruction(LHS), m_Instruction(RHS),
+                           m_Mask(OldMask))))
     return false;
 
   // TODO: Add support for addlike etc.
@@ -2661,6 +2661,13 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
                                PredLHS, CostKind, Op0Info, Op1Info);
   }
 
+  // If LHS/RHS have other uses, we need to account for the cost of keeping
+  // the original instructions.
+  if (!LHS->hasOneUse())
+    NewCost += TTI.getInstructionCost(LHS, CostKind);
+  if (!RHS->hasOneUse())
+    NewCost += TTI.getInstructionCost(RHS, CostKind);
+
   LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
                     << "\n  OldCost: " << OldCost << " vs NewCost: " << NewCost
                     << "\n");
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 3a3ba74663b93..367240e30d243 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -408,7 +408,8 @@ define void @test_31(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[T0:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 0, i32 2, i32 5, i32 2>
 ; CHECK-NEXT:    [[H0:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[T0]], ptr [[H0]], align 16
-; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[H1:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 1
 ; CHECK-NEXT:    store <4 x i32> [[T1]], ptr [[H1]], align 16
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index 0cceab506bb75..e1e9501968043 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -197,6 +197,58 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
   ret <4 x i32> %r
 }
 
+; Multi-use tests
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_lhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT:    [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %b0 = sdiv <4 x i32> %x, %y
+  call void @use(<4 x i32> %b0)
+  %b1 = sdiv <4 x i32> %z, %y
+  %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_rhs(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT:    call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %b0 = sdiv <4 x i32> %x, %y
+  %b1 = sdiv <4 x i32> %z, %y
+  call void @use(<4 x i32> %b1)
+  %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_sdiv_v4i32_multiuse_both(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[B0:%.*]] = sdiv <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    [[B1:%.*]] = sdiv <4 x i32> [[Z]], [[Y]]
+; CHECK-NEXT:    call void @use(<4 x i32> [[B0]])
+; CHECK-NEXT:    call void @use(<4 x i32> [[B1]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %b0 = sdiv <4 x i32> %x, %y
+  %b1 = sdiv <4 x i32> %z, %y
+  call void @use(<4 x i32> %b0)
+  call void @use(<4 x i32> %b1)
+  %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %r
+}
+
 ; non-matching operands (not commutable)
 
 define <4 x float> @shuf_fdiv_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) {
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index f9108efa7ee79..c044764b0ede9 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -248,17 +248,36 @@ define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64>
   ret <4 x i32> %r
 }
 
-; negative test - uses
+; Multi-use test - fold benaviour depends on enabled extensions
 
 define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
-; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
-; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
-; CHECK-NEXT:    [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
-; CHECK-NEXT:    call void @use(<4 x i1> [[C0]])
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
-; CHECK-NEXT:    ret <4 x i32> [[R]]
+; SSE-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; SSE-NEXT:    [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; SSE-NEXT:    call void @use(<4 x i1> [[C0]])
+; SSE-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; SSE-NEXT:    ret <4 x i32> [[R]]
+;
+; AVX2-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX2-NEXT:    [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; AVX2-NEXT:    call void @use(<4 x i1> [[C0]])
+; AVX2-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX2-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX2-NEXT:    ret <4 x i32> [[R]]
+;
+; AVX512-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; AVX512-NEXT:    call void @use(<4 x i1> [[C0]])
+; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX512-NEXT:    [[S:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[TMP2]]
+; AVX512-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX512-NEXT:    ret <4 x i32> [[R]]
 ;
   %c0 = icmp ugt <4 x i32> %x, %y
   %c1 = icmp ugt <4 x i32> %z, %w

>From d9787126199f48c759d134c1a1177d0d263ef0f5 Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Tue, 3 Feb 2026 18:58:28 +0800
Subject: [PATCH 2/4] fix typo in a comment

---
 llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index c044764b0ede9..30ba7bd6d03d1 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -248,7 +248,7 @@ define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64>
   ret <4 x i32> %r
 }
 
-; Multi-use test - fold benaviour depends on enabled extensions
+; Multi-use test - folding depends on enabled extensions
 
 define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
 ; SSE-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(

>From 0950f52344f06362f54ac4eff4bc6448f503fa5b Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Wed, 4 Feb 2026 15:18:18 +0800
Subject: [PATCH 3/4] update cost computation logic with corner cases

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp     | 13 +++++++++----
 .../VectorCombine/AArch64/select-shuffle.ll         |  3 +--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 6ccd1b2a3b7c7..45fb0113d41fc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2605,9 +2605,11 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   }
 
   // Try to replace a binop with a shuffle if the shuffle is not costly.
+  // When LHS == RHS, only count the binop cost once.
   InstructionCost OldCost =
       TTI.getInstructionCost(LHS, CostKind) +
-      TTI.getInstructionCost(RHS, CostKind) +
+      (LHS != RHS ? TTI.getInstructionCost(RHS, CostKind)
+                  : InstructionCost(0)) +
       TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
                          BinResTy, OldMask, CostKind, 0, nullptr, {LHS, RHS},
                          &I);
@@ -2642,7 +2644,10 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0, CostKind);
   ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1, CostKind);
   bool SingleSrcBinOp = (X == Y) && (Z == W) && (NewMask0 == NewMask1);
-  ReducedInstCount |= SingleSrcBinOp;
+  // SingleSrcBinOp only reduces instruction count if we also eliminate the
+  // original binop(s). If binops have multiple uses, they won't be eliminated.
+  ReducedInstCount |=
+      SingleSrcBinOp && LHS->hasOneUse() && (LHS == RHS || RHS->hasOneUse());
 
   auto *ShuffleCmpTy =
       FixedVectorType::get(BinOpTy->getElementType(), ShuffleDstTy);
@@ -2662,10 +2667,10 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   }
 
   // If LHS/RHS have other uses, we need to account for the cost of keeping
-  // the original instructions.
+  // the original instructions. When LHS == RHS, only add the cost once.
   if (!LHS->hasOneUse())
     NewCost += TTI.getInstructionCost(LHS, CostKind);
-  if (!RHS->hasOneUse())
+  if (LHS != RHS && !RHS->hasOneUse())
     NewCost += TTI.getInstructionCost(RHS, CostKind);
 
   LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 367240e30d243..3a3ba74663b93 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -408,8 +408,7 @@ define void @test_31(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[T0:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 0, i32 2, i32 5, i32 2>
 ; CHECK-NEXT:    [[H0:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[T0]], ptr [[H0]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> [[ADD]], <4 x i32> <i32 6, i32 0, i32 1, i32 7>
 ; CHECK-NEXT:    [[H1:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 1
 ; CHECK-NEXT:    store <4 x i32> [[T1]], ptr [[H1]], align 16
 ; CHECK-NEXT:    ret void

>From 7e5b21300611d60cc0d0f3034575da8d89d96cb5 Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Thu, 5 Feb 2026 17:08:07 +0800
Subject: [PATCH 4/4] fix weight computation in shuffling of binops

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 45fb0113d41fc..7967cc7eef9a2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2575,6 +2575,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   if (!ShuffleDstTy || !BinResTy || !BinOpTy || X->getType() != Z->getType())
     return false;
 
+  bool SameBinOp = LHS == RHS;
   unsigned NumSrcElts = BinOpTy->getNumElements();
 
   // If we have something like "add X, Y" and "add Z, X", swap ops to match.
@@ -2605,12 +2606,15 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   }
 
   // Try to replace a binop with a shuffle if the shuffle is not costly.
-  // When LHS == RHS, only count the binop cost once.
-  InstructionCost OldCost =
-      TTI.getInstructionCost(LHS, CostKind) +
-      (LHS != RHS ? TTI.getInstructionCost(RHS, CostKind)
-                  : InstructionCost(0)) +
-      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
+  // When SameBinOp, only count the binop cost once.
+  InstructionCost LHSCost = TTI.getInstructionCost(LHS, CostKind);
+  InstructionCost RHSCost = TTI.getInstructionCost(RHS, CostKind);
+  
+  InstructionCost OldCost = LHSCost;
+  if (!SameBinOp) {
+    OldCost += RHSCost;
+  }
+  OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
                          BinResTy, OldMask, CostKind, 0, nullptr, {LHS, RHS},
                          &I);
 
@@ -2646,8 +2650,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   bool SingleSrcBinOp = (X == Y) && (Z == W) && (NewMask0 == NewMask1);
   // SingleSrcBinOp only reduces instruction count if we also eliminate the
   // original binop(s). If binops have multiple uses, they won't be eliminated.
-  ReducedInstCount |=
-      SingleSrcBinOp && LHS->hasOneUse() && (LHS == RHS || RHS->hasOneUse());
+  ReducedInstCount |= SingleSrcBinOp && LHS->hasOneUse() && RHS->hasOneUse();
 
   auto *ShuffleCmpTy =
       FixedVectorType::get(BinOpTy->getElementType(), ShuffleDstTy);
@@ -2665,13 +2668,12 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
         TTI.getCmpSelInstrCost(LHS->getOpcode(), ShuffleCmpTy, ShuffleDstTy,
                                PredLHS, CostKind, Op0Info, Op1Info);
   }
-
   // If LHS/RHS have other uses, we need to account for the cost of keeping
-  // the original instructions. When LHS == RHS, only add the cost once.
+  // the original instructions. When SameBinOp, only add the cost once.
   if (!LHS->hasOneUse())
-    NewCost += TTI.getInstructionCost(LHS, CostKind);
-  if (LHS != RHS && !RHS->hasOneUse())
-    NewCost += TTI.getInstructionCost(RHS, CostKind);
+    NewCost += LHSCost;
+  if (!SameBinOp && !RHS->hasOneUse())
+    NewCost += RHSCost;
 
   LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
                     << "\n  OldCost: " << OldCost << " vs NewCost: " << NewCost



More information about the llvm-commits mailing list