[llvm] [VectorCombine] foldShuffleOfBinops - fold shuffle(binop(shuffle(x),shuffle(z)),binop(shuffle(y),shuffle(w)) -> binop(shuffle(x,z),shuffle(y,w)) (PR #120984)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 30 09:53:45 PST 2024
- Previous message: [llvm] [VectorCombine] foldShuffleOfBinops - fold shuffle(binop(shuffle(x),shuffle(z)),binop(shuffle(y),shuffle(w)) -> binop(shuffle(x,z),shuffle(y,w)) (PR #120984)
- Next message: [llvm] [VectorCombine] foldShuffleOfBinops - fold shuffle(binop(shuffle(x),shuffle(z)),binop(shuffle(y),shuffle(w)) -> binop(shuffle(x,z),shuffle(y,w)) (PR #120984)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/120984
>From 1436446f1a006205ea25546e09354258c2b44bf2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 23 Dec 2024 17:38:38 +0000
Subject: [PATCH 1/2] [VectorCombine] foldShuffleOfBinops - fold
shuffle(binop(shuffle(x),shuffle(z)),binop(shuffle(y),shuffle(w)) ->
binop(shuffle(x,z),shuffle(y,w))
Some patterns (in particular horizontal style patterns) can end up with shuffles straddling both sides of a binop/cmp.
Where individually the folds aren't worth it, by merging the (oneuse) shuffles we can notably reduce the net instruction count and cost.
One of the final steps towards finally addressing #34072
---
.../Transforms/Vectorize/VectorCombine.cpp | 34 +++-
.../test/Transforms/PhaseOrdering/X86/hadd.ll | 187 ++++++------------
.../Transforms/PhaseOrdering/X86/pr50392.ll | 9 +-
.../X86/extract-binop-inseltpoison.ll | 11 +-
4 files changed, 95 insertions(+), 146 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4a0f014be0e75c6..259ca64de7aaa49 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1732,6 +1732,36 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, BinResTy,
OldMask, CostKind, 0, nullptr, {LHS, RHS}, &I);
+ // Handle shuffle(binop(shuffle(x),y),binop(z,shuffle(w))) style patterns
+ // where one use shuffles have gotten split across the binop/cmp. These
+ // often allow a major reduction in total cost that wouldn't happen as
+ // individual folds.
+ auto MergeInner = [&](Value *&Op, int Offset, MutableArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind) -> bool {
+ Value *InnerOp;
+ ArrayRef<int> InnerMask;
+ if (match(Op, m_OneUse(m_Shuffle(m_Value(InnerOp), m_Undef(),
+ m_Mask(InnerMask)))) &&
+ all_of(InnerMask,
+ [NumSrcElts](int M) { return M < (int)NumSrcElts; }) &&
+ InnerOp->getType() == Op->getType()) {
+ for (int &M : Mask)
+ if (Offset <= M && M < (int)(Offset + NumSrcElts)) {
+ M = InnerMask[M - Offset];
+ M = 0 <= M ? M + Offset : M;
+ }
+ OldCost += TTI.getInstructionCost(cast<Instruction>(Op), CostKind);
+ Op = InnerOp;
+ return true;
+ }
+ return false;
+ };
+ bool ReducedInstCount = false;
+ ReducedInstCount |= MergeInner(X, 0, NewMask0, CostKind);
+ ReducedInstCount |= MergeInner(Y, 0, NewMask1, CostKind);
+ ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0, CostKind);
+ ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1, CostKind);
+
InstructionCost NewCost =
TTI.getShuffleCost(SK0, BinOpTy, NewMask0, CostKind, 0, nullptr, {X, Z}) +
TTI.getShuffleCost(SK1, BinOpTy, NewMask1, CostKind, 0, nullptr, {Y, W});
@@ -1752,8 +1782,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
// If either shuffle will constant fold away, then fold for the same cost as
// we will reduce the instruction count.
- bool ReducedInstCount = (isa<Constant>(X) && isa<Constant>(Z)) ||
- (isa<Constant>(Y) && isa<Constant>(W));
+ ReducedInstCount |= (isa<Constant>(X) && isa<Constant>(Z)) ||
+ (isa<Constant>(Y) && isa<Constant>(W));
if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
return false;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
index 798824bce4dac2d..67da29b6cee7d87 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
@@ -78,30 +78,16 @@ define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
; SSE2-NEXT: ret <8 x i16> [[RESULT]]
;
; SSE4-LABEL: @add_v8i16_u1234567(
-; SSE4-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
-; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
-; SSE4-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 poison, i32 2, i32 5, i32 6, i32 8, i32 10, i32 12, i32 14>
+; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 poison, i32 3, i32 4, i32 7, i32 9, i32 11, i32 13, i32 15>
; SSE4-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
-; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SSE4-NEXT: ret <8 x i16> [[RESULT]]
+; SSE4-NEXT: ret <8 x i16> [[TMP7]]
;
; AVX-LABEL: @add_v8i16_u1234567(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 poison, i32 2, i32 5, i32 6, i32 8, i32 10, i32 12, i32 14>
+; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 poison, i32 3, i32 4, i32 7, i32 9, i32 11, i32 13, i32 15>
; AVX-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX-NEXT: ret <8 x i16> [[RESULT]]
+; AVX-NEXT: ret <8 x i16> [[TMP7]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1
@@ -172,13 +158,10 @@ define <4 x i32> @add_v4i32_0123(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @add_v4i32_u123(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @add_v4i32_u123(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 poison, i32 2, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 poison, i32 3, i32 4, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> <i32 poison, i32 2, i32 4, i32 5>
-; CHECK-NEXT: ret <4 x i32> [[RESULT1]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
@@ -202,13 +185,10 @@ define <4 x i32> @add_v4i32_u123(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @add_v4i32_0u23(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @add_v4i32_0u23(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 poison, i32 4, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
-; CHECK-NEXT: ret <4 x i32> [[RESULT1]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
@@ -232,40 +212,28 @@ define <4 x i32> @add_v4i32_0u23(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @add_v4i32_01u3(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @add_v4i32_01u3(
-; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[SHIFT]]
-; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B:%.*]], <4 x i32> <i32 2, i32 poison, i32 6, i32 poison>
-; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 3, i32 poison, i32 7, i32 poison>
+; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6>
+; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; SSE2-NEXT: [[RESULT1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
-; SSE2-NEXT: ret <4 x i32> [[RESULT1]]
+; SSE2-NEXT: ret <4 x i32> [[TMP4]]
;
; SSE4-LABEL: @add_v4i32_01u3(
-; SSE4-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; SSE4-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[B]]
-; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6>
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 3, i32 poison, i32 7>
; SSE4-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 7>
-; SSE4-NEXT: ret <4 x i32> [[RESULT]]
+; SSE4-NEXT: ret <4 x i32> [[TMP4]]
;
; AVX2-LABEL: @add_v4i32_01u3(
-; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; AVX2-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[B]]
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 3, i32 poison, i32 7>
; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 7>
-; AVX2-NEXT: ret <4 x i32> [[RESULT]]
+; AVX2-NEXT: ret <4 x i32> [[TMP4]]
;
; AVX512-LABEL: @add_v4i32_01u3(
-; AVX512-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[SHIFT]]
-; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B:%.*]], <4 x i32> <i32 2, i32 poison, i32 6, i32 poison>
-; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 3, i32 poison, i32 7, i32 poison>
+; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6>
+; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7>
; AVX512-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; AVX512-NEXT: [[RESULT1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
-; AVX512-NEXT: ret <4 x i32> [[RESULT1]]
+; AVX512-NEXT: ret <4 x i32> [[TMP4]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
@@ -289,13 +257,10 @@ define <4 x i32> @add_v4i32_01u3(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @add_v4i32_012u(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @add_v4i32_012u(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B:%.*]], <4 x i32> <i32 2, i32 4, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 3, i32 5, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
-; CHECK-NEXT: ret <4 x i32> [[RESULT1]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
@@ -420,17 +385,14 @@ define <8 x i32> @add_v8i32_01234567(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @add_v8i32_01234u67(<8 x i32> %a, <8 x i32> %b) {
; SSE2-LABEL: @add_v8i32_01234u67(
-; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]]
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <2 x i32> <i32 5, i32 6>
; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <2 x i32> <i32 4, i32 7>
; SSE2-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP5]], [[TMP6]]
-; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP2]], [[TMP3]]
-; SSE2-NEXT: [[HADD4:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[HADD4]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 8, i32 9>
+; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 8, i32 9>
; SSE2-NEXT: ret <8 x i32> [[RESULT]]
;
; SSE4-LABEL: @add_v8i32_01234u67(
@@ -449,17 +411,10 @@ define <8 x i32> @add_v8i32_01234u67(<8 x i32> %a, <8 x i32> %b) {
; SSE4-NEXT: ret <8 x i32> [[RESULT]]
;
; AVX-LABEL: @add_v8i32_01234u67(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[HADD4:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 13, i32 14>
+; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 12, i32 15>
; AVX-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP5]], [[TMP6]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[HADD4]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 8, i32 9>
-; AVX-NEXT: ret <8 x i32> [[RESULT]]
+; AVX-NEXT: ret <8 x i32> [[TMP7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
@@ -530,13 +485,10 @@ define <4 x float> @add_v4f32_0123(<4 x float> %a, <4 x float> %b) {
define <4 x float> @add_v4f32_u123(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @add_v4f32_u123(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 poison, i32 2, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 poison, i32 3, i32 4, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> <i32 poison, i32 2, i32 4, i32 5>
-; CHECK-NEXT: ret <4 x float> [[RESULT1]]
+; CHECK-NEXT: ret <4 x float> [[TMP4]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
@@ -599,22 +551,16 @@ define <4 x float> @add_v4f32_01u3(<4 x float> %a, <4 x float> %b) {
; SSE2-NEXT: ret <4 x float> [[RESULT1]]
;
; SSE4-LABEL: @add_v4f32_01u3(
-; SSE4-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; SSE4-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[B]]
-; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6>
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 3, i32 poison, i32 7>
; SSE4-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
-; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 7>
-; SSE4-NEXT: ret <4 x float> [[RESULT]]
+; SSE4-NEXT: ret <4 x float> [[TMP4]]
;
; AVX2-LABEL: @add_v4f32_01u3(
-; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; AVX2-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[B]]
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 3, i32 poison, i32 7>
; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
-; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 7>
-; AVX2-NEXT: ret <4 x float> [[RESULT]]
+; AVX2-NEXT: ret <4 x float> [[TMP4]]
;
; AVX512-LABEL: @add_v4f32_01u3(
; AVX512-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
@@ -820,17 +766,10 @@ define <8 x float> @add_v8f32_012u4567(<8 x float> %a, <8 x float> %b) {
; SSE-NEXT: ret <8 x float> [[RESULT]]
;
; AVX-LABEL: @add_v8f32_012u4567(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP4:%.*]] = fadd <8 x float> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[HADD5:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 14, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 6, i32 13, i32 14>
+; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 7, i32 12, i32 15>
; AVX-NEXT: [[TMP7:%.*]] = fadd <8 x float> [[TMP5]], [[TMP6]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[HADD5]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 8, i32 9>
-; AVX-NEXT: ret <8 x float> [[RESULT]]
+; AVX-NEXT: ret <8 x float> [[TMP7]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@@ -983,13 +922,10 @@ define <4 x double> @add_v4f64_u123(<4 x double> %a, <4 x double> %b) {
; SSE4-NEXT: ret <4 x double> [[RESULT]]
;
; AVX-LABEL: @add_v4f64_u123(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A:%.*]], <4 x i32> <i32 poison, i32 0, i32 6, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 poison, i32 1, i32 7, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> <i32 poison, i32 0, i32 6, i32 2>
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 poison, i32 1, i32 7, i32 3>
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP1]], <4 x i32> <i32 poison, i32 1, i32 2, i32 6>
-; AVX-NEXT: ret <4 x double> [[RESULT]]
+; AVX-NEXT: ret <4 x double> [[TMP4]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
@@ -1034,13 +970,10 @@ define <4 x double> @add_v4f64_0u23(<4 x double> %a, <4 x double> %b) {
; SSE4-NEXT: ret <4 x double> [[RESULT]]
;
; AVX-LABEL: @add_v4f64_0u23(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 2, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 0, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 1, i32 poison, i32 2, i32 6>
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 poison, i32 3, i32 7>
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
-; AVX-NEXT: ret <4 x double> [[RESULT]]
+; AVX-NEXT: ret <4 x double> [[TMP4]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
@@ -1085,13 +1018,10 @@ define <4 x double> @add_v4f64_01u3(<4 x double> %a, <4 x double> %b) {
; SSE4-NEXT: ret <4 x double> [[RESULT]]
;
; AVX-LABEL: @add_v4f64_01u3(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 7>
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 6>
-; AVX-NEXT: ret <4 x double> [[RESULT]]
+; AVX-NEXT: ret <4 x double> [[TMP4]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
@@ -1136,13 +1066,10 @@ define <4 x double> @add_v4f64_012u(<4 x double> %a, <4 x double> %b) {
; SSE4-NEXT: ret <4 x double> [[RESULT]]
;
; AVX-LABEL: @add_v4f64_012u(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[A]], [[SHIFT]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 poison>
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 poison>
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]]
-; AVX-NEXT: [[RESULT:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison>
-; AVX-NEXT: ret <4 x double> [[RESULT]]
+; AVX-NEXT: ret <4 x double> [[TMP4]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
index 4e1051d1991aa8d..d92df9741644b7d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
@@ -31,13 +31,10 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
; SSE4-NEXT: ret <4 x double> [[SHUFFLE]]
;
; AVX-LABEL: @PR50392(
-; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
-; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[B:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B1:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 6>
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B1]], <4 x i32> <i32 1, i32 poison, i32 5, i32 7>
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
-; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
-; AVX-NEXT: ret <4 x double> [[SHUFFLE]]
+; AVX-NEXT: ret <4 x double> [[TMP4]]
;
%vecext = extractelement <4 x double> %a, i32 0
%vecext1 = extractelement <4 x double> %a, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index 800f57646a3e13f..6ef18e66d4211fb 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -468,15 +468,10 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: @PR34724(
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
-; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
-; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B1:%.*]], <4 x i32> <i32 poison, i32 2, i32 4, i32 6>
+; SSE-NEXT: [[B:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B1]], <4 x i32> <i32 poison, i32 3, i32 5, i32 7>
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 poison, i32 2, i32 4, i32 poison>
-; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; SSE-NEXT: ret <4 x float> [[V3]]
+; SSE-NEXT: ret <4 x float> [[TMP3]]
;
; AVX-LABEL: @PR34724(
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
>From a7d3934f9991a423581f7ff1009cf6f221ebef94 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Sat, 28 Dec 2024 11:34:42 +0000
Subject: [PATCH 2/2] Move Type equivalence earlier (cheap checks first)
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 259ca64de7aaa49..66bf0f271042c67 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1742,9 +1742,9 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
ArrayRef<int> InnerMask;
if (match(Op, m_OneUse(m_Shuffle(m_Value(InnerOp), m_Undef(),
m_Mask(InnerMask)))) &&
+ InnerOp->getType() == Op->getType() &&
all_of(InnerMask,
- [NumSrcElts](int M) { return M < (int)NumSrcElts; }) &&
- InnerOp->getType() == Op->getType()) {
+ [NumSrcElts](int M) { return M < (int)NumSrcElts; })) {
for (int &M : Mask)
if (Offset <= M && M < (int)(Offset + NumSrcElts)) {
M = InnerMask[M - Offset];
- Previous message: [llvm] [VectorCombine] foldShuffleOfBinops - fold shuffle(binop(shuffle(x),shuffle(z)),binop(shuffle(y),shuffle(w)) -> binop(shuffle(x,z),shuffle(y,w)) (PR #120984)
- Next message: [llvm] [VectorCombine] foldShuffleOfBinops - fold shuffle(binop(shuffle(x),shuffle(z)),binop(shuffle(y),shuffle(w)) -> binop(shuffle(x,z),shuffle(y,w)) (PR #120984)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list