[llvm] [VectorCombine] Add foldShuffleToIdentity (PR #88693)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 22 00:42:14 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/88693

>From a1093262605f8481df4016d8da2f01ea781c0619 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 21 Apr 2024 15:03:47 +0100
Subject: [PATCH 1/3] [VectorCombine] Add foldShuffleToIdentity

This patch adds a basic version of a combine that attempts to fold away
shuffles that when combines simplify away to an identity shuffle.
For example:
  %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
  %abt = fneg <4 x half> %at
  %abb = fneg <4 x half> %ab
  %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
By looking through the shuffles, it can be simplified to:
  %r = fneg <8 x half> %a

The code tracks each lane starting from the original shuffle, keeping a track
of a vector of {src, idx}. As we propagate up through the instructions we will
either look through intermediate instructions (binops and unops) or see a
collections of lanes that all have the same src and incrementing idx (an
identity). We can also see a single value with identical lanes, which we can
treat like a splat.

Only the basic version is added here, handling identites, splats, binops and
unops. In follow-up patches other instructions can be added such as constants,
intrinsics, cmp/sel and zext/sext/trunc.
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 144 +++++++++++++++++
 .../AArch64/shuffletoidentity.ll              | 150 ++++--------------
 .../Transforms/VectorCombine/X86/pr67803.ll   |   5 +-
 .../VectorCombine/X86/select-shuffle.ll       |   3 +-
 .../VectorCombine/X86/shuffle-of-shuffles.ll  |   5 +-
 5 files changed, 177 insertions(+), 130 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4918cee1fa82a3..d4f23373f562b4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -113,6 +113,7 @@ class VectorCombine {
   bool scalarizeLoadExtract(Instruction &I);
   bool foldShuffleOfBinops(Instruction &I);
   bool foldShuffleOfCastops(Instruction &I);
+  bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
   bool foldTruncFromReductions(Instruction &I);
   bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1547,6 +1548,148 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
   return true;
 }
 
+// Starting from a shuffle, look up through operands tracking the shuffled index
+// of each lane. If we can simplify away the shuffles to identities then
+// do so.
+bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
+  FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType());
+  if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
+      !isa<Instruction>(I.getOperand(1)))
+    return false;
+
+  using InstLane = std::pair<Value *, int>;
+
+  auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
+    while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
+      unsigned NumElts =
+          cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
+      int M = SV->getMaskValue(Lane);
+      if (M < 0)
+        return {nullptr, -1};
+      else if (M < (int)NumElts) {
+        V = SV->getOperand(0);
+        Lane = M;
+      } else {
+        V = SV->getOperand(1);
+        Lane = M - NumElts;
+      }
+    }
+    return InstLane{V, Lane};
+  };
+
+  auto GenerateInstLaneVectorFromOperand =
+      [&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
+        SmallVector<InstLane> NItem;
+        for (InstLane V : Item) {
+          NItem.emplace_back(
+              !V.first
+                  ? InstLane{nullptr, -1}
+                  : LookThroughShuffles(
+                        cast<Instruction>(V.first)->getOperand(Op), V.second));
+        }
+        return NItem;
+      };
+
+  SmallVector<InstLane> Start;
+  for (unsigned M = 0; M < Ty->getNumElements(); ++M)
+    Start.push_back(LookThroughShuffles(&I, M));
+
+  SmallVector<SmallVector<InstLane>> Worklist;
+  Worklist.push_back(Start);
+  SmallPtrSet<Value *, 4> IdentityLeafs, SplatLeafs;
+  unsigned NumVisited = 0;
+
+  while (!Worklist.empty()) {
+    SmallVector<InstLane> Item = Worklist.pop_back_val();
+    if (++NumVisited > MaxInstrsToScan)
+      return false;
+
+    // If we found an undef first lane then bail out to keep things simple.
+    if (!Item[0].first)
+      return false;
+
+    // Look for an identity value.
+    if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
+        all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
+          return !E.value().first || (E.value().first == Item[0].first &&
+                                      E.value().second == (int)E.index());
+        })) {
+      IdentityLeafs.insert(Item[0].first);
+      continue;
+    }
+    // Look for a splat value.
+    if (all_of(drop_begin(Item), [&](InstLane &IL) {
+          return !IL.first ||
+                 (IL.first == Item[0].first && IL.second == Item[0].second);
+        })) {
+      SplatLeafs.insert(Item[0].first);
+      continue;
+    }
+
+    // We need each element to be the same type of value, and check that each
+    // element has a single use.
+    if (!all_of(drop_begin(Item), [&](InstLane IL) {
+          if (!IL.first)
+            return true;
+          if (isa<Instruction>(IL.first) &&
+              !cast<Instruction>(IL.first)->hasOneUse())
+            return false;
+          return IL.first->getValueID() == Item[0].first->getValueID() &&
+                 (!isa<IntrinsicInst>(IL.first) ||
+                  cast<IntrinsicInst>(IL.first)->getIntrinsicID() ==
+                      cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
+        }))
+      return false;
+
+    // Check the operator is one that we support.
+    if (isa<BinaryOperator>(Item[0].first)) {
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
+    } else if (isa<UnaryOperator>(Item[0].first)) {
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+    } else {
+      return false;
+    }
+  }
+
+  // If we got this far, we know the shuffles are superfluous and can be
+  // removed. Scan through again and generate the new tree of instructions.
+  std::function<Value *(const SmallVector<InstLane> &)> generate =
+      [&](const SmallVector<InstLane> &Item) -> Value * {
+    if (IdentityLeafs.contains(Item[0].first) &&
+        all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
+          return !E.value().first || (E.value().first == Item[0].first &&
+                                      E.value().second == (int)E.index());
+        })) {
+      return Item[0].first;
+    } else if (SplatLeafs.contains(Item[0].first)) {
+      if (auto ILI = dyn_cast<Instruction>(Item[0].first))
+        Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
+      else if (isa<Argument>(Item[0].first))
+        Builder.SetInsertPointPastAllocas(I.getParent()->getParent());
+      SmallVector<int, 16> Mask(Ty->getNumElements(), Item[0].second);
+      return Builder.CreateShuffleVector(Item[0].first, Mask);
+    }
+
+    auto *I = cast<Instruction>(Item[0].first);
+    SmallVector<Value *> Ops;
+    unsigned E = I->getNumOperands();
+    for (unsigned Idx = 0; Idx < E; Idx++)
+      Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx)));
+    Builder.SetInsertPoint(I);
+    if (auto BI = dyn_cast<BinaryOperator>(I))
+      return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
+                                 Ops[0], Ops[1]);
+    if (auto UI = dyn_cast<UnaryOperator>(I))
+      return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]);
+    llvm_unreachable("Unhandled instruction in generate");
+  };
+
+  Value *V = generate(Start);
+  replaceValue(I, *V);
+  return true;
+}
+
 /// Given a commutative reduction, the order of the input lanes does not alter
 /// the results. We can use this to remove certain shuffles feeding the
 /// reduction, removing the need to shuffle at all.
@@ -2103,6 +2246,7 @@ bool VectorCombine::run() {
         MadeChange |= foldShuffleOfBinops(I);
         MadeChange |= foldShuffleOfCastops(I);
         MadeChange |= foldSelectShuffle(I);
+        MadeChange |= foldShuffleToIdentity(I);
         break;
       case Instruction::BitCast:
         MadeChange |= foldBitcastShuffle(I);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 56b42380a68bfb..43ece88256acb9 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -5,10 +5,7 @@ target triple = "aarch64"
 
 define <8 x i8> @trivial(<8 x i8> %a) {
 ; CHECK-LABEL: @trivial(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i8> [[R]]
+; CHECK-NEXT:    ret <8 x i8> [[A:%.*]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
@@ -18,13 +15,7 @@ define <8 x i8> @trivial(<8 x i8> %a) {
 
 define <8 x i8> @add(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: @add(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -83,13 +74,7 @@ define <8 x i8> @wrong_lanes(<8 x i8> %a, <8 x i8> %b) {
 
 define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: @fadd(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -104,11 +89,7 @@ define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) {
 
 define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: @fneg(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT:%.*]] = fneg <4 x half> [[AT]]
-; CHECK-NEXT:    [[ABB:%.*]] = fneg <4 x half> [[AB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = fneg <8 x half> [[A:%.*]]
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -138,12 +119,8 @@ define <8 x i8> @abs(<8 x i8> %a) {
 
 define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: @splat0(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]]
-; CHECK-NEXT:    [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -157,12 +134,8 @@ define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) {
 
 define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: @splat2(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]]
-; CHECK-NEXT:    [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -176,12 +149,8 @@ define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) {
 
 define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: @splatandidentity(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BS:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]]
-; CHECK-NEXT:    [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[A]], [[TMP1]]
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -195,11 +164,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
 
 define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: @splattwice(
-; CHECK-NEXT:    [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[AB1:%.*]] = fadd <4 x half> [[AS]], [[BS]]
-; CHECK-NEXT:    [[AB2:%.*]] = fadd <4 x half> [[AS]], [[BS]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[AB1]], <4 x half> [[AB2]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer
@@ -212,13 +179,7 @@ define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
 
 define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: @undeflane(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 poison, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -284,18 +245,9 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) {
 
 define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: @inner_shuffle(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[CS:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[ABT:%.*]] = mul <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB:%.*]] = mul <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[ABT2:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ABB2:%.*]] = shufflevector <4 x i8> [[ABB]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ABT3:%.*]] = add <4 x i8> [[ABT2]], [[CS]]
-; CHECK-NEXT:    [[ABB3:%.*]] = add <4 x i8> [[ABB2]], [[CS]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -338,14 +290,9 @@ define <8 x i8> @extrause_add(<8 x i8> %a, <8 x i8> %b) {
 
 define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: @extrause_shuffle(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
 ; CHECK-NEXT:    call void @use(<4 x i8> [[BT]])
-; CHECK-NEXT:    [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -460,7 +407,7 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
 ; CHECK-NEXT:    [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]])
 ; CHECK-NEXT:    [[ABB:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]])
-; CHECK-NEXT:    [[ABT1:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]])
+; CHECK-NEXT:    [[ABT1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]])
 ; CHECK-NEXT:    [[ABB1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]])
 ; CHECK-NEXT:    [[ABT2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]])
 ; CHECK-NEXT:    [[ABB2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]])
@@ -475,7 +422,7 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) {
   %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
   %abt = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %at, <4 x i8> %bt)
   %abb = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %ab, <4 x i8> %bb)
-  %abt1 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %abt, <4 x i8> %bt)
+  %abt1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abt, <4 x i8> %bt)
   %abb1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abb, <4 x i8> %bb)
   %abt2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abt1, <4 x i8> %bt)
   %abb2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abb1, <4 x i8> %bb)
@@ -543,53 +490,16 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
 ; CHECK-LABEL: @v8f64interleave(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8
-; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT:    [[STRIDED_VEC27:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT:    [[STRIDED_VEC28:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT:    [[STRIDED_VEC29:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT:    [[STRIDED_VEC30:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT:    [[STRIDED_VEC31:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT:    [[STRIDED_VEC32:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT:    [[STRIDED_VEC33:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
-; CHECK-NEXT:    [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP3]], align 8
-; CHECK-NEXT:    [[STRIDED_VEC35:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT:    [[STRIDED_VEC36:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT:    [[STRIDED_VEC37:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT:    [[STRIDED_VEC38:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT:    [[STRIDED_VEC39:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT:    [[STRIDED_VEC40:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT:    [[STRIDED_VEC41:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT:    [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[STRIDED_VEC35]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd fast <2 x double> [[STRIDED_VEC36]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast <2 x double> [[STRIDED_VEC37]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[STRIDED_VEC38]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <2 x double> [[STRIDED_VEC39]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <2 x double> [[STRIDED_VEC40]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP16:%.*]] = fadd fast <2 x double> [[STRIDED_VEC41]], [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or disjoint i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP20:%.*]] = fadd fast <2 x double> [[STRIDED_VEC42]], [[TMP18]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -56
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP23]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> [[TMP27]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:    store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP21]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <16 x double> [[WIDE_VEC]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
+; CHECK-NEXT:    [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = fadd <16 x double> [[WIDE_VEC34]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56
+; CHECK-NEXT:    store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
index 69fd6f6a10e2a6..8dd3bc684a4c1c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
@@ -5,10 +5,7 @@
 define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @PR67803(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[CMP_LO:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[CMP_HI:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i1> [[CMP_LO]], <4 x i1> [[CMP_HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i32>
 ; CHECK-NEXT:    [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
 ; CHECK-NEXT:    [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float>
diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll
index 685d661ea6bcd2..a14995403cfacf 100644
--- a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll
@@ -15,8 +15,7 @@ define <4 x double> @PR60649() {
 ; CHECK-NEXT:    [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ]
 ; CHECK-NEXT:    [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], <double 0.000000e+00, double 0.000000e+00, double undef, double undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <4 x double> [[T0]], <double 0.000000e+00, double 0.000000e+00, double undef, double undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], <double 0.000000e+00, double 0.000000e+00, double undef, double undef>
 ; CHECK-NEXT:    [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    ret <4 x double> [[T5]]
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
index b5b5bb997c6c7a..a8116b39ac3560 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
@@ -7,10 +7,7 @@
 define <8 x i32> @concat_extract_subvectors(<8 x i32> %x) {
 ; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors(
 ; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x i32> [[CONCAT]]
+; CHECK-NEXT:    ret <8 x i32> [[X]]
 ;
   %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>

>From 64b4bdec2e92f4228950533f9bc0efa35c502081 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 17 Apr 2024 13:45:02 +0100
Subject: [PATCH 2/3] Address comments and fix failing test

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d4f23373f562b4..28286140cd7fc3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1552,7 +1552,7 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
 // of each lane. If we can simplify away the shuffles to identities then
 // do so.
 bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
-  FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType());
+  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
   if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
       !isa<Instruction>(I.getOperand(1)))
     return false;
@@ -1565,7 +1565,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
           cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
       int M = SV->getMaskValue(Lane);
       if (M < 0)
-        return {nullptr, -1};
+        return {nullptr, PoisonMaskElem};
       else if (M < (int)NumElts) {
         V = SV->getOperand(0);
         Lane = M;
@@ -1578,12 +1578,12 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
   };
 
   auto GenerateInstLaneVectorFromOperand =
-      [&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
+      [&LookThroughShuffles](ArrayRef<InstLane> Item, int Op) {
         SmallVector<InstLane> NItem;
         for (InstLane V : Item) {
           NItem.emplace_back(
               !V.first
-                  ? InstLane{nullptr, -1}
+                  ? InstLane{nullptr, PoisonMaskElem}
                   : LookThroughShuffles(
                         cast<Instruction>(V.first)->getOperand(Op), V.second));
         }
@@ -1631,8 +1631,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
     if (!all_of(drop_begin(Item), [&](InstLane IL) {
           if (!IL.first)
             return true;
-          if (isa<Instruction>(IL.first) &&
-              !cast<Instruction>(IL.first)->hasOneUse())
+          if (auto *I = dyn_cast<Instruction>(IL.first); I && !I->hasOneUse())
             return false;
           return IL.first->getValueID() == Item[0].first->getValueID() &&
                  (!isa<IntrinsicInst>(IL.first) ||
@@ -1654,8 +1653,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
 
   // If we got this far, we know the shuffles are superfluous and can be
   // removed. Scan through again and generate the new tree of instructions.
-  std::function<Value *(const SmallVector<InstLane> &)> generate =
-      [&](const SmallVector<InstLane> &Item) -> Value * {
+  std::function<Value *(ArrayRef<InstLane>)> generate =
+      [&](ArrayRef<InstLane> Item) -> Value * {
     if (IdentityLeafs.contains(Item[0].first) &&
         all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
           return !E.value().first || (E.value().first == Item[0].first &&

>From 7babf9e8d713a9ef7b8dd014609bc6848bf0ff9e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 22 Apr 2024 08:42:02 +0100
Subject: [PATCH 3/3] Rebase and update phase-ordering tests

---
 .../AArch64/interleavevectorization.ll        | 43 +++++--------------
 1 file changed, 10 insertions(+), 33 deletions(-)

diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
index b49c0ad9976654..2ceba48a6a5d5c 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
@@ -22,23 +22,13 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-NEXT:    [[TMP6:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <32 x i16> [[TMP6]], <32 x i16> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-NEXT:    [[TMP8:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <32 x i16> [[TMP9]], <32 x i16> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i16> [[TMP11]], <16 x i16> [[TMP12]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]]
 ; CHECK-NEXT:    store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
-; CHECK-NEXT:    br i1 [[TMP13]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
@@ -424,26 +414,13 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-NEXT:    [[TMP6:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP7:%.*]] = add <32 x i16> [[TMP6]], [[WIDE_VEC36]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <32 x i16> [[TMP7]], <32 x i16> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add <32 x i16> [[TMP9]], [[WIDE_VEC36]]
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <32 x i16> [[TMP10]], <32 x i16> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-NEXT:    [[TMP12:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP13:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add <32 x i16> [[TMP13]], [[WIDE_VEC36]]
-; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <32 x i16> [[TMP14]], <32 x i16> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x i16> [[TMP11]], <8 x i16> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i16> [[TMP16]], <16 x i16> [[TMP17]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]]
 ; CHECK-NEXT:    store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
-; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;



More information about the llvm-commits mailing list