[llvm] [VectorCombine] Add foldShuffleToIdentity (PR #88693)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 00:56:40 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/88693
This patch adds a basic version of a combine that attempts to remove shuffles that when combined simplify away to an identity shuffle. For example:
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
%abt = fneg <4 x half> %at
%abb = fneg <4 x half> %ab
%r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
By looking through the shuffles and fneg, it can be simplified to:
%r = fneg <8 x half> %a
The code tracks each lane starting from the original shuffle, keeping a track of a vector of {src, idx}. As we propagate up through the instructions we will either look through intermediate instructions (binops and unops) or see a collections of lanes that all have the same src and incrementing idx (an identity). We can also see a single value with identical lanes, which we can treat like a splat.
Only the basic version is added here, handling identities, splats, binops and unops. In follow-up patches other instructions can be added such as constants, intrinsics, cmp/sel and zext/sext/trunc.
>From 37dda86f51df0bddf5690b5509e2a347d30e8c6c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 15 Apr 2024 08:34:26 +0100
Subject: [PATCH] [VectorCombine] Add foldShuffleToIdentity
This patch adds a basic version of a combine that attempts to fold away
shuffles that when combines simplify away to an identity shuffle.
For example:
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
%abt = fneg <4 x half> %at
%abb = fneg <4 x half> %ab
%r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
By looking through the shuffles, it can be simplified to:
%r = fneg <8 x half> %a
The code tracks each lane starting from the original shuffle, keeping a track
of a vector of {src, idx}. As we propagate up through the instructions we will
either look through intermediate instructions (binops and unops) or see a
collections of lanes that all have the same src and incrementing idx (an
identity). We can also see a single value with identical lanes, which we can
treat like a splat.
Only the basic version is added here, handling identites, splats, binops and
unops. In follow-up patches other instructions can be added such as constants,
intrinsics, cmp/sel and zext/sext/trunc.
---
.../Transforms/Vectorize/VectorCombine.cpp | 141 ++++++++++++++++++
.../AArch64/shuffletoidentity.ll | 134 +++--------------
.../Transforms/VectorCombine/X86/pr67803.ll | 5 +-
.../VectorCombine/X86/select-shuffle.ll | 3 +-
4 files changed, 165 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index e0e2f50c89adad..12e37f67d8b264 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -113,6 +113,7 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfCastops(Instruction &I);
+ bool foldShuffleToIdentity(Instruction &I);
bool foldShuffleFromReductions(Instruction &I);
bool foldTruncFromReductions(Instruction &I);
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1547,6 +1548,145 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
return true;
}
+// Starting from a shuffle, look up through operands tracking the shuffled index
+// of each lane. If we can simplify away the shuffles to identities then
+// do so.
+bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
+ FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType());
+ if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
+ !isa<Instruction>(I.getOperand(1)))
+ return false;
+
+ using InstLane = std::pair<Value *, int>;
+
+ auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned NumElts =
+ cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
+ int M = SV->getMaskValue(Lane);
+ if (M < 0)
+ return {nullptr, -1};
+ else if (M < (int)NumElts) {
+ V = SV->getOperand(0);
+ Lane = M;
+ } else {
+ V = SV->getOperand(1);
+ Lane = M - NumElts;
+ }
+ }
+ return InstLane{V, Lane};
+ };
+
+ auto GenerateInstLaneVectorFromOperand =
+ [&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
+ SmallVector<InstLane> NItem;
+ for (InstLane V : Item) {
+ NItem.emplace_back(
+ !V.first
+ ? InstLane{nullptr, -1}
+ : LookThroughShuffles(
+ cast<Instruction>(V.first)->getOperand(Op), V.second));
+ }
+ return NItem;
+ };
+
+ SmallVector<InstLane> Start;
+ for (unsigned M = 0; M < Ty->getNumElements(); ++M)
+ Start.push_back(LookThroughShuffles(&I, M));
+
+ SmallVector<SmallVector<InstLane>> Worklist;
+ Worklist.push_back(Start);
+ SmallPtrSet<Value *, 4> IdentityLeafs, SplatLeafs;
+
+ while (!Worklist.empty()) {
+ SmallVector<InstLane> Item = Worklist.pop_back_val();
+
+ // If we found an undef first lane then bail out to keep things simple.
+ if (!Item[0].first)
+ return false;
+
+ // Look for an identity value.
+ if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
+ all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
+ return !E.value().first || (E.value().first == Item[0].first &&
+ E.value().second == (int)E.index());
+ })) {
+ IdentityLeafs.insert(Item[0].first);
+ continue;
+ }
+ // Look for a splat value.
+ if (all_of(drop_begin(Item), [&](InstLane &IL) {
+ return !IL.first ||
+ (IL.first == Item[0].first && IL.second == Item[0].second);
+ })) {
+ SplatLeafs.insert(Item[0].first);
+ continue;
+ }
+
+ // We need each element to be the same type of value, and check that each
+ // element has a single use.
+ if (!all_of(drop_begin(Item), [&](InstLane IL) {
+ if (!IL.first)
+ return true;
+ if (isa<Instruction>(IL.first) &&
+ !cast<Instruction>(IL.first)->hasOneUse())
+ return false;
+ return IL.first->getValueID() == Item[0].first->getValueID() &&
+ (!isa<IntrinsicInst>(IL.first) ||
+ cast<IntrinsicInst>(IL.first)->getIntrinsicID() ==
+ cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
+ }))
+ return false;
+
+ // Check the operator is one that we support.
+ if (isa<BinaryOperator>(Item[0].first)) {
+ Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
+ } else if (isa<UnaryOperator>(Item[0].first)) {
+ Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+ } else {
+ return false;
+ }
+ }
+
+ // If we got this far, we know the shuffles are superfluous and can be
+ // removed. Scan through again and generate the new tree of instructions.
+ std::function<Value *(const SmallVector<InstLane> &)> generate =
+ [&](const SmallVector<InstLane> &Item) -> Value * {
+ if (IdentityLeafs.contains(Item[0].first) &&
+ all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
+ return !E.value().first || (E.value().first == Item[0].first &&
+ E.value().second == (int)E.index());
+ })) {
+ return Item[0].first;
+ } else if (SplatLeafs.contains(Item[0].first)) {
+ if (auto ILI = dyn_cast<Instruction>(Item[0].first))
+ Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
+ else if (isa<Argument>(Item[0].first))
+ Builder.SetInsertPointPastAllocas(I.getParent()->getParent());
+ SmallVector<int, 16> Mask(Ty->getNumElements(), Item[0].second);
+ return Builder.CreateShuffleVector(Item[0].first, Mask);
+ }
+
+ auto *I = cast<Instruction>(Item[0].first);
+ SmallVector<Value *> Ops;
+ unsigned E = I->getNumOperands();
+ for (unsigned Idx = 0; Idx < E; Idx++)
+ Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx)));
+ Builder.SetInsertPoint(I);
+ if (auto BI = dyn_cast<BinaryOperator>(I))
+ return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
+ Ops[0], Ops[1]);
+ if (auto UI = dyn_cast<UnaryOperator>(I))
+ return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]);
+ llvm_unreachable("Unhandled instruction in generate");
+ };
+
+ Value *V = generate(Start);
+ replaceValue(I, *V);
+ return true;
+}
+
/// Given a commutative reduction, the order of the input lanes does not alter
/// the results. We can use this to remove certain shuffles feeding the
/// reduction, removing the need to shuffle at all.
@@ -2103,6 +2243,7 @@ bool VectorCombine::run() {
MadeChange |= foldShuffleOfBinops(I);
MadeChange |= foldShuffleOfCastops(I);
MadeChange |= foldSelectShuffle(I);
+ MadeChange |= foldShuffleToIdentity(I);
break;
case Instruction::BitCast:
MadeChange |= foldBitcastShuffle(I);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index d96dfec849167d..47f52a341df24f 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -5,10 +5,7 @@ target triple = "aarch64"
define <8 x i8> @trivial(<8 x i8> %a) {
; CHECK-LABEL: @trivial(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: ret <8 x i8> [[R]]
+; CHECK-NEXT: ret <8 x i8> [[R:%.*]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
@@ -18,13 +15,7 @@ define <8 x i8> @trivial(<8 x i8> %a) {
define <8 x i8> @add(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: @add(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -83,13 +74,7 @@ define <8 x i8> @wrong_lanes(<8 x i8> %a, <8 x i8> %b) {
define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @fadd(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BT]]
-; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BB]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -104,11 +89,7 @@ define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) {
define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @fneg(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[ABT:%.*]] = fneg <4 x half> [[AT]]
-; CHECK-NEXT: [[ABB:%.*]] = fneg <4 x half> [[AB]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[R:%.*]] = fneg <8 x half> [[A:%.*]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -138,12 +119,8 @@ define <8 x i8> @abs(<8 x i8> %a) {
define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splat0(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]]
-; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -157,12 +134,8 @@ define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) {
define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splat2(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]]
-; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -176,12 +149,8 @@ define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) {
define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splatandidentity(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]]
-; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A]], [[TMP1]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -195,11 +164,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splattwice(
-; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[AB1:%.*]] = fadd <4 x half> [[AS]], [[BS]]
-; CHECK-NEXT: [[AB2:%.*]] = fadd <4 x half> [[AS]], [[BS]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[AB1]], <4 x half> [[AB2]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer
@@ -212,13 +179,7 @@ define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: @undeflane(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 poison, i32 1, i32 0>
+; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -284,18 +245,9 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) {
define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: @inner_shuffle(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CS:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[ABT:%.*]] = mul <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT: [[ABB:%.*]] = mul <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT: [[ABT2:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[ABB2:%.*]] = shufflevector <4 x i8> [[ABB]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[ABT3:%.*]] = add <4 x i8> [[ABT2]], [[CS]]
-; CHECK-NEXT: [[ABB3:%.*]] = add <4 x i8> [[ABB2]], [[CS]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -338,14 +290,9 @@ define <8 x i8> @extrause_add(<8 x i8> %a, <8 x i8> %b) {
define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: @extrause_shuffle(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[BT1:%.*]] = shufflevector <8 x i8> [[B1]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT: [[BT1:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
; CHECK-NEXT: call void @use(<4 x i8> [[BT1]])
-; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[BT]], [[BT1]]
-; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B1]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -543,52 +490,15 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
; CHECK-LABEL: @v8f64interleave(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT: [[STRIDED_VEC28:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT: [[STRIDED_VEC29:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT: [[STRIDED_VEC30:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT: [[STRIDED_VEC31:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT: [[STRIDED_VEC32:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT: [[STRIDED_VEC33:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <16 x double> [[WIDE_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP3]], align 8
-; CHECK-NEXT: [[STRIDED_VEC35:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT: [[STRIDED_VEC36:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT: [[STRIDED_VEC37:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT: [[STRIDED_VEC38:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT: [[STRIDED_VEC39:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT: [[STRIDED_VEC40:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT: [[STRIDED_VEC41:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[STRIDED_VEC35]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[STRIDED_VEC36]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[STRIDED_VEC37]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[STRIDED_VEC38]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[STRIDED_VEC39]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[STRIDED_VEC40]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <2 x double> [[STRIDED_VEC41]], [[TMP15]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd <16 x double> [[WIDE_VEC34]], [[TMP4]]
; CHECK-NEXT: [[TMP17:%.*]] = or disjoint i64 [[TMP0]], 7
-; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP17]]
-; CHECK-NEXT: [[TMP20:%.*]] = fadd fast <2 x double> [[STRIDED_VEC42]], [[TMP18]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -56
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP23]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> [[TMP27]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP21]], align 8
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
index 69fd6f6a10e2a6..0277580d21fcb7 100644
--- a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
@@ -6,10 +6,7 @@ define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float
; CHECK-LABEL: @PR67803(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMP_LO:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[CMP_HI:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i1> [[CMP_LO]], <4 x i1> [[CMP_HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
; CHECK-NEXT: [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float>
; CHECK-NEXT: [[SEL:%.*]] = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[MASK]])
diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll
index 685d661ea6bcd2..a14995403cfacf 100644
--- a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll
@@ -15,8 +15,7 @@ define <4 x double> @PR60649() {
; CHECK-NEXT: [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ]
; CHECK-NEXT: [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], <double 0.000000e+00, double 0.000000e+00, double undef, double undef>
+; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[T0]], <double 0.000000e+00, double 0.000000e+00, double undef, double undef>
; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], <double 0.000000e+00, double 0.000000e+00, double undef, double undef>
; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x double> [[T5]]
More information about the llvm-commits
mailing list